Table of Contents

  • 1  result of undo
    • 1.1  Results in different city (undo target)
      • 1.1.1  different city (undo target)
      • 1.1.2  MAS: different city (new destination)
      • 1.1.3  MAS: different city (undo target )
      • 1.1.4  4 conditions: (undo2start, undoNot2start) x (different path, same path)
      • 1.1.5  4 conditions, but actually better after all (around the branching node)
      • 1.1.6  4 conditions: (undo2start, undoNot2start) x (different path, same path) - sequential undo (no need?)
      • 1.1.7  NOS: different city (undo target)
      • 1.1.8  Budget left: different city (undo target)
      • 1.1.9  RT
      • 1.1.10  Zigzagness
    • 1.2  Overlapped sequence
      • 1.2.1  overlapped sequence from all of the prev seqs.
      • 1.2.2  Histogram - length of overlapped sequences
      • 1.2.3  How often overlaps with previous paths
        • 1.2.3.1  (including length = 1; which means only a city overlapped)
        • 1.2.3.2  (length > 1; sequences)
      • 1.2.4  MAS (maximum achivable score) difference between two child nodes (bf and af undo; undo target)
        • 1.2.4.1  (including length = 1; which means only a city overlapped)
        • 1.2.4.2  (length > 1; sequences)
      • 1.2.5  MAS (maximum achivable score) difference between two end nodes (bf and af undo; new destination)
        • 1.2.5.1  (including length = 1; which means only a city overlapped)
        • 1.2.5.2  (length > 1; sequences)
      • 1.2.6  NOS
      • 1.2.7  leftover
      • 1.2.8  Zigzagness
        • 1.2.8.1  How zigzagness has changed due to overlapped seq
    • 1.3  Cagegorizing patterns
    • 1.4  Condition-wise difference in the same puzzle
      • 1.4.1  Zigzagness
    • 1.5  Overlapped cities not sequences (to be removed)
      • 1.5.1  overlaps from all of the prev seqs
In [5]:
import pandas as pd
import numpy as np

import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

from scipy import stats
from scipy.stats import sem
from scipy.stats import shapiro
from scipy.stats import normaltest
from scipy.stats import ttest_rel,ttest_ind,wilcoxon

import statsmodels.api as sm
import pylab as py
In [6]:
home_dir = '/Users/dbao/google_drive_db'+'/road_construction/data/2022_online/'
home_dir = '../../../2022_online/'
map_dir = 'active_map/'
data_dir  = 'data/preprocessed'
out_dir = home_dir + 'figures/cogsci_2022/'
R_out_dir = home_dir + 'R_analysis_data/'
In [7]:
data_puzzle_level = pd.read_csv(R_out_dir +  'data.csv')
puzzleID_order_data = data_puzzle_level.sort_values(["subjects","puzzleID"])
data_choice_level = pd.read_csv(R_out_dir +  'choice_level/choicelevel_data.csv')

single_condition_data = puzzleID_order_data[puzzleID_order_data['condition']==1].copy()
single_condition_data = single_condition_data.reset_index()
sc_data_choice_level = data_choice_level[data_choice_level['condition']==1].reset_index()
In [8]:
# how many undo sequences? 
seq = 0
noseq = 0

seq2start = 0
seqNot2start = 0

tostart = 0
not2start = 0

for sub in range(100):
    dat_sbj  = sc_data_choice_level[sc_data_choice_level['subjects']==sub].sort_values(["puzzleID","index"])
    for pzi in np.unique(sc_data_choice_level['puzzleID']):
        dat_sbj_pzi = dat_sbj[dat_sbj['puzzleID'] == pzi].reset_index()        
        lastUndo_idx = dat_sbj_pzi[dat_sbj_pzi["lastUndo"]==1].index   
        firstUndo_idx = dat_sbj_pzi[dat_sbj_pzi["firstUndo"]==1].index
        
        seq += np.sum(lastUndo_idx != firstUndo_idx)
        noseq += np.sum(lastUndo_idx == firstUndo_idx)
        
        lastUndo_idx = np.setdiff1d(lastUndo_idx,firstUndo_idx)
        seq2start += np.sum(dat_sbj_pzi["choice"][lastUndo_idx] == 0)
        seqNot2start += np.sum(dat_sbj_pzi["choice"][lastUndo_idx] > 2)
        
        tostart += np.sum(dat_sbj_pzi["currNumCities"][lastUndo_idx]==1)
        not2start += np.sum(dat_sbj_pzi["currNumCities"][lastUndo_idx]!=1)
In [9]:
print(seq)
print(noseq)
print(seq2start)
print(seqNot2start)
print(tostart)
print(not2start)
2094
566
1103
956
1103
991

result of undo¶

Results in different city (undo target)¶

different city (undo target)¶

In [10]:
undo_same_diff = []

for sub in range(100):
    dat_sbj  = sc_data_choice_level[sc_data_choice_level['subjects']==sub].sort_values(["puzzleID","index"])
    undo_same_diff_puzzle = []
    for pzi in np.unique(sc_data_choice_level['puzzleID']):
        same_puzzle = 0
        diff_puzzle = 0
        
        dat_sbj_pzi = dat_sbj[dat_sbj['puzzleID'] == pzi].reset_index()        
     
        lastUndo_idx = dat_sbj_pzi[dat_sbj_pzi["lastUndo"]==1].index
        
        idxx = np.array(dat_sbj_pzi["choice"][lastUndo_idx-1]) != np.array(dat_sbj_pzi["choice"][lastUndo_idx+1])
        diff_puzzle += np.sum(idxx)
        idxx = np.array(dat_sbj_pzi["choice"][lastUndo_idx-1]) == np.array(dat_sbj_pzi["choice"][lastUndo_idx+1])
        same_puzzle += np.sum(idxx)
        
        undo_same_diff_puzzle.append([same_puzzle, diff_puzzle])
    undo_same_diff_puzzle =  np.array(undo_same_diff_puzzle)
    undo_same_diff_puzzle =  np.sum(undo_same_diff_puzzle,axis=0)
    
    if np.sum(undo_same_diff_puzzle)==0:
        print(sub) # who does not undo 

    undo_same_diff.append(undo_same_diff_puzzle)
    
undo_same_diff = np.array(undo_same_diff)
20
25
31
46
53
66
67
76
84
97
In [11]:
# exclude some never undoing subjects
undo_same_diff = undo_same_diff[np.where(np.sum(np.array(undo_same_diff),axis=1)!=0),:]
undo_same_diff = undo_same_diff.squeeze()
undo_same_diff_p = undo_same_diff/ np.sum(undo_same_diff,axis = 1)[:,None]
In [12]:
np.mean(undo_same_diff_p,axis=0)
Out[12]:
array([0.11857317, 0.88142683])
In [13]:
## check variance
print(np.std(undo_same_diff_p[:,0], ddof=1))
print(np.std(undo_same_diff_p[:,1], ddof=1))
## check normality
sm.qqplot(undo_same_diff_p[:,0], line='s')
py.show()
sm.qqplot(undo_same_diff_p[:,1], line='s')
py.show()
0.14508927164018273
0.14508927164018273
In [14]:
stat1, p1 = wilcoxon(undo_same_diff_p[:,0],undo_same_diff_p[:,1])
print(stat1)
print(p1)
82.0
2.101861284114228e-15
In [15]:
%matplotlib notebook
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
fig = plt.figure(figsize=(6,4.5))
# plt.figure(figsize=(5,3.75))
bb = plt.bar(range(2), np.mean(undo_same_diff_p,axis=0),
        color=[.7,.7,.7], edgecolor = 'k', 
        yerr=np.std(undo_same_diff_p,axis = 0)/np.sqrt(undo_same_diff_p.shape[0]))
plt.xticks([0,1], ['same','different'])
plt.ylabel('Proportion')
plt.xlabel('Undo target city') # next city after undoing

#statistics
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.1, 0.05, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p1), ha='center', va='bottom', color=col, fontsize = 8)
fig.savefig(out_dir + 'proportion_undo_same_diff.png', dpi=600, bbox_inches='tight')

MAS: different city (new destination)¶

In [16]:
undo_for_better = []

for sub in range(100):
    dat_sbj  = sc_data_choice_level[sc_data_choice_level['subjects']==sub].sort_values(["puzzleID","index"])
    undo_for_puzzle = []
    for pzi in np.unique(sc_data_choice_level['puzzleID']):
        dat_sbj_pzi = dat_sbj[dat_sbj['puzzleID'] == pzi].reset_index()        

        firstUndo_idx = dat_sbj_pzi[dat_sbj_pzi["firstUndo"]==1].index
        path_bf_undo = dat_sbj_pzi["currMas"][firstUndo_idx-1] # the mas of the state before undo
        
        lastUndo_idx = dat_sbj_pzi[dat_sbj_pzi["lastUndo"]==1].index
        path_af_undo = dat_sbj_pzi["currMas"][lastUndo_idx+1] # the mas of the state after undo
        
        # I think it doesn't mean it choose different path, it means at least once the chosen city is different after undo
        if np.any(np.array(dat_sbj_pzi["choice"][lastUndo_idx-1]) != np.array(dat_sbj_pzi["choice"][lastUndo_idx+1])):
            
            idxx = np.array(dat_sbj_pzi["choice"][lastUndo_idx-1]) != np.array(dat_sbj_pzi["choice"][lastUndo_idx+1])
            undo_for_puzzle.extend(np.sign(np.array(path_af_undo[idxx]) - np.array(path_bf_undo[idxx])))
#         else:
#             print('hup')
    undo_for_puzzle =  np.array(undo_for_puzzle)
    undo_for_better.append([np.sum(undo_for_puzzle<0), np.sum(undo_for_puzzle==0) ,np.sum(undo_for_puzzle>0)])
undo_for_better = np.array(undo_for_better)
In [17]:
# exclude some never undoing subjects
undo_for_better = undo_for_better[np.where(np.sum(np.array(undo_for_better),axis=1)!=0),:]
undo_for_better = undo_for_better.squeeze()
undo_for_better_p = undo_for_better/ np.sum(undo_for_better,axis = 1)[:,None]
In [18]:
stat12, p12 = wilcoxon(undo_for_better_p[:,0],undo_for_better_p[:,1])
stat23, p23 = wilcoxon(undo_for_better_p[:,1],undo_for_better_p[:,2])
print(stat12)
print(p12)
print(stat23)
print(p23)
241.5
1.574357589671037e-10
728.5
2.4395252313658338e-06
In [19]:
%matplotlib notebook
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
fig = plt.figure(figsize=(6,4.5))
# plt.figure(figsize=(5,3.75))
bb = plt.bar(range(3), np.mean(undo_for_better_p,axis=0),
        color=[.7,.7,.7], edgecolor = 'k', 
        yerr=np.std(undo_for_better_p,axis = 0)/np.sqrt(undo_for_better_p.shape[0]))
plt.ylabel('Maximum achivable score after undo')
plt.xticks([0,1,2], ['Worse','Same','Better'])

#statistics
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.1, 0.05, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)


x1, x2 = 1,2
y, h, col = np.max([bb[2].get_height(),bb[1].get_height()]) + 0.1, 0.05, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p23), ha='center', va='bottom', color=col, fontsize = 8)

fig.savefig(out_dir + 'proportion_undo_same_better_worse.png', dpi=600, bbox_inches='tight')

MAS: different city (undo target )¶

In [20]:
undo_for_better = []

for sub in range(100):
    dat_sbj  = sc_data_choice_level[sc_data_choice_level['subjects']==sub].sort_values(["puzzleID","index"])
    undo_for_puzzle = []
    for pzi in np.unique(sc_data_choice_level['puzzleID']):
        dat_sbj_pzi = dat_sbj[dat_sbj['puzzleID'] == pzi].reset_index()        

        firstUndo_idx = dat_sbj_pzi[dat_sbj_pzi["firstUndo"]==1].index
        
        lastUndo_idx = dat_sbj_pzi[dat_sbj_pzi["lastUndo"]==1].index
        path_bf_undo = dat_sbj_pzi["currMas"][lastUndo_idx-1] # the mas of the state before undo
        path_af_undo = dat_sbj_pzi["currMas"][lastUndo_idx+1] # the mas of the state after undo
        
        # I think it doesn't mean it choose different path, it means at least once the chosen city is different after undo
        if np.any(np.array(dat_sbj_pzi["choice"][lastUndo_idx-1]) != np.array(dat_sbj_pzi["choice"][lastUndo_idx+1])):
            
            idxx = np.array(dat_sbj_pzi["choice"][lastUndo_idx-1]) != np.array(dat_sbj_pzi["choice"][lastUndo_idx+1])
            undo_for_puzzle.extend(np.sign(np.array(path_af_undo[idxx]) - np.array(path_bf_undo[idxx])))
#         else:
#             print('hup')
    undo_for_puzzle =  np.array(undo_for_puzzle)
    undo_for_better.append([np.sum(undo_for_puzzle<0), np.sum(undo_for_puzzle==0) ,np.sum(undo_for_puzzle>0)])
undo_for_better = np.array(undo_for_better)
In [21]:
# exclude some never undoing subjects
undo_for_better = undo_for_better[np.where(np.sum(np.array(undo_for_better),axis=1)!=0),:]
undo_for_better = undo_for_better.squeeze()
undo_for_better_p = undo_for_better/ np.sum(undo_for_better,axis = 1)[:,None]
In [22]:
stat12, p12 = wilcoxon(undo_for_better_p[:,0],undo_for_better_p[:,1])
stat23, p23 = wilcoxon(undo_for_better_p[:,1],undo_for_better_p[:,2])
print(stat12)
print(p12)
print(stat23)
print(p23)
359.0
8.348934873555565e-08
1358.5
0.2096610964042439
In [23]:
%matplotlib inline
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
fig = plt.figure(figsize=(6,4.5))
bb = plt.bar(range(3), np.mean(undo_for_better_p,axis=0),
        color=[.7,.7,.7], edgecolor = 'k', 
        yerr=np.std(undo_for_better_p,axis = 0)/np.sqrt(undo_for_better_p.shape[0]))
plt.ylabel('Maximum achivable score after undo')
plt.xticks([0,1,2], ['Worse','Same','Better'])

#statistics
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.1, 0.05, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)


x1, x2 = 1,2
y, h, col = np.max([bb[2].get_height(),bb[1].get_height()]) + 0.1, 0.05, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p23), ha='center', va='bottom', color=col, fontsize = 8)

fig.savefig(out_dir + 'proportion_MAS_same_better_worse.png', dpi=600, bbox_inches='tight')

4 conditions: (undo2start, undoNot2start) x (different path, same path)¶

In [24]:
undo_same_diff = []
undo_4condi = [] # (undo2start, same), (undo2start, different), (undoNot2start, same), (undoNot2start, different)

for sub in range(100):
    dat_sbj  = sc_data_choice_level[sc_data_choice_level['subjects']==sub].sort_values(["puzzleID","index"])
    undo_same_diff_puzzle = []
    undo_4condi_puzzle = []
    for pzi in np.unique(sc_data_choice_level['puzzleID']):
        same_puzzle = 0
        diff_puzzle = 0

        dat_sbj_pzi = dat_sbj[dat_sbj['puzzleID'] == pzi].reset_index()        

        lastUndo_idx = dat_sbj_pzi[dat_sbj_pzi["lastUndo"]==1].index
        choice_idxx = dat_sbj_pzi[dat_sbj_pzi["choice"]==0].index
        undo2start_idx = np.intersect1d(choice_idxx, lastUndo_idx)
        undoNot2start_idx = np.setdiff1d(lastUndo_idx, undo2start_idx)

        u2s_s = np.sum(np.array(dat_sbj_pzi["choice"][undo2start_idx-1]) == np.array(dat_sbj_pzi["choice"][undo2start_idx+1]))
        u2s_d = np.sum(np.array(dat_sbj_pzi["choice"][undo2start_idx-1]) != np.array(dat_sbj_pzi["choice"][undo2start_idx+1]))
        uns_s = np.sum(np.array(dat_sbj_pzi["choice"][undoNot2start_idx-1]) == np.array(dat_sbj_pzi["choice"][undoNot2start_idx+1]))
        uns_d = np.sum(np.array(dat_sbj_pzi["choice"][undoNot2start_idx-1]) != np.array(dat_sbj_pzi["choice"][undoNot2start_idx+1]))

        # undo_same_diff_puzzle.append([same_puzzle, diff_puzzle])
        undo_4condi_puzzle.append([u2s_s, u2s_d, uns_s, uns_d])

        #     undo_same_diff_puzzle =  np.array(undo_same_diff_puzzle)
        #     undo_same_diff_puzzle =  np.sum(undo_same_diff_puzzle,axis=0)
    
    undo_4condi_puzzle = np.array(undo_4condi_puzzle)
    undo_4condi_puzzle = np.sum(undo_4condi_puzzle,axis = 0)
    
    if np.sum(undo_4condi_puzzle)==0:
        print(sub) # who does not undo 

    undo_4condi.append(undo_4condi_puzzle)
    
undo_4condi = np.array(undo_4condi)
20
25
31
46
53
66
67
76
84
97
In [25]:
undo_4condi
undo_4condi_p = undo_4condi/ np.sum(undo_4condi,axis = 1)[:,None]
# print(undo_4condi)
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:2: RuntimeWarning: invalid value encountered in true_divide
  
In [26]:
stat12, p12 = wilcoxon(undo_4condi_p[:,0],undo_4condi_p[:,1])
stat34, p34 = wilcoxon(undo_4condi_p[:,2],undo_4condi_p[:,3])
print(stat12)
print(p12)
print(stat34)
print(p34)
20.5
3.4547372556015567e-16
105.5
4.563751503697428e-16
In [27]:
%matplotlib notebook
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
plt.figure(figsize=(6,4.5))
plt.bar(range(4), np.nanmean(undo_4condi_p,axis=0),
        color=[.7,.7,.7], edgecolor = 'k', 
        yerr=np.nanstd(undo_4condi_p,axis = 0)/np.sqrt(undo_4condi_p.shape[0]))
plt.xticks([0,0.5,1,2,2.5,3], ['same','\nUndo to start','different','same','\nUndo not to start','different'])
Out[27]:
([<matplotlib.axis.XTick at 0x7fed18e1e210>,
  <matplotlib.axis.XTick at 0x7fed2be9f5d0>,
  <matplotlib.axis.XTick at 0x7fed18d784d0>,
  <matplotlib.axis.XTick at 0x7fed2bea2350>,
  <matplotlib.axis.XTick at 0x7fed2bea2b50>,
  <matplotlib.axis.XTick at 0x7fed2beb34d0>],
 [Text(0.0, 0, 'same'),
  Text(0.5, 0, '\nUndo to start'),
  Text(1.0, 0, 'different'),
  Text(2.0, 0, 'same'),
  Text(2.5, 0, '\nUndo not to start'),
  Text(3.0, 0, 'different')])

4 conditions, but actually better after all (around the branching node)¶

In [28]:
undo_same_diff = []
undo_4condi = [] # (undo2start, same), (undo2start, different), (undoNot2start, same), (undoNot2start, different)
undo_better_2s = []
undo_better_n2s = []
for sub in range(100):
    dat_sbj  = sc_data_choice_level[sc_data_choice_level['subjects']==sub].sort_values(["puzzleID","index"])
    undo_same_diff_puzzle = []
    undo_4condi_puzzle = []
    undo_better_2s_puzzle = []
    undo_better_n2s_puzzle = []
    
    for pzi in np.unique(sc_data_choice_level['puzzleID']):
        same_puzzle = 0
        diff_puzzle = 0

        dat_sbj_pzi = dat_sbj[dat_sbj['puzzleID'] == pzi].reset_index()        

        lastUndo_idx = dat_sbj_pzi[dat_sbj_pzi["lastUndo"]==1].index
        choice_idxx = dat_sbj_pzi[dat_sbj_pzi["choice"]==0].index
        undo2start_idx = np.intersect1d(choice_idxx, lastUndo_idx)
        undoNot2start_idx = np.setdiff1d(lastUndo_idx, undo2start_idx)
        
        path_bf_2s_undo = dat_sbj_pzi["currMas"][undo2start_idx-1] # the mas of the state before undo
        path_af_2s_undo = dat_sbj_pzi["currMas"][undo2start_idx+1] # the mas of the state after undo
        if np.any(np.array(dat_sbj_pzi["choice"][undo2start_idx-1]) != np.array(dat_sbj_pzi["choice"][undo2start_idx+1])):
            idxx = np.array(dat_sbj_pzi["choice"][undo2start_idx-1]) != np.array(dat_sbj_pzi["choice"][undo2start_idx+1])
            undo_better_2s_puzzle.extend(np.sign(np.array(path_af_2s_undo[idxx]) - np.array(path_bf_2s_undo[idxx])))
            
        path_bf_n2s_undo = dat_sbj_pzi["currMas"][undoNot2start_idx-1] # the mas of the state before undo
        path_af_n2s_undo = dat_sbj_pzi["currMas"][undoNot2start_idx+1] # the mas of the state after undo
        if np.any(np.array(dat_sbj_pzi["choice"][undoNot2start_idx-1]) != np.array(dat_sbj_pzi["choice"][undoNot2start_idx+1])):
            idxx = np.array(dat_sbj_pzi["choice"][undoNot2start_idx-1]) != np.array(dat_sbj_pzi["choice"][undoNot2start_idx+1])
            undo_better_n2s_puzzle.extend(np.sign(np.array(path_af_n2s_undo[idxx]) - np.array(path_bf_n2s_undo[idxx])))
            

        u2s_s = np.sum(np.array(dat_sbj_pzi["choice"][undo2start_idx-1]) == np.array(dat_sbj_pzi["choice"][undo2start_idx+1]))
        u2s_d = np.sum(np.array(dat_sbj_pzi["choice"][undo2start_idx-1]) != np.array(dat_sbj_pzi["choice"][undo2start_idx+1]))
        uns_s = np.sum(np.array(dat_sbj_pzi["choice"][undoNot2start_idx-1]) == np.array(dat_sbj_pzi["choice"][undoNot2start_idx+1]))
        uns_d = np.sum(np.array(dat_sbj_pzi["choice"][undoNot2start_idx-1]) != np.array(dat_sbj_pzi["choice"][undoNot2start_idx+1]))

        
        # undo_same_diff_puzzle.append([same_puzzle, diff_puzzle])
        undo_4condi_puzzle.append([u2s_s, u2s_d, uns_s, uns_d])

        #     undo_same_diff_puzzle =  np.array(undo_same_diff_puzzle)
        #     undo_same_diff_puzzle =  np.sum(undo_same_diff_puzzle,axis=0)
    
    undo_better_2s_puzzle =  np.array(undo_better_2s_puzzle)
    undo_better_2s.append([np.sum(undo_better_2s_puzzle<0), np.sum(undo_better_2s_puzzle==0) ,np.sum(undo_better_2s_puzzle>0)])
    
    undo_better_n2s_puzzle =  np.array(undo_better_n2s_puzzle)
    undo_better_n2s.append([np.sum(undo_better_n2s_puzzle<0), np.sum(undo_better_n2s_puzzle==0) ,np.sum(undo_better_n2s_puzzle>0)])
    
    
    undo_4condi_puzzle = np.array(undo_4condi_puzzle)
    undo_4condi_puzzle = np.sum(undo_4condi_puzzle,axis = 0)
    
    if np.sum(undo_4condi_puzzle)==0:
        print(sub) # who does not undo 

    undo_4condi.append(undo_4condi_puzzle)
    
undo_4condi = np.array(undo_4condi)

undo_better_2s = np.array(undo_better_2s)
undo_better_n2s = np.array(undo_better_n2s)
20
25
31
46
53
66
67
76
84
97
In [29]:
undo_better_2s = undo_better_2s[np.where(np.sum(np.array(undo_better_2s),axis=1)!=0),:]
undo_better_2s = undo_better_2s.squeeze()

undo_better_2s_p = undo_better_2s/ np.sum(undo_better_2s,axis = 1)[:,None]

undo_better_n2s = undo_better_n2s[np.where(np.sum(np.array(undo_better_n2s),axis=1)!=0),:]
undo_better_n2s = undo_better_n2s.squeeze()

undo_better_n2s_p = undo_better_n2s/ np.sum(undo_better_n2s,axis = 1)[:,None]
In [30]:
%matplotlib notebook
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
fig , axes = plt.subplots(1,2,figsize=(6,4.5))

bb = axes[0].bar(range(3), np.nanmean(undo_better_2s_p,axis=0),
        color=[.7,.7,.7], edgecolor = 'k', 
        yerr=np.nanstd(undo_better_2s_p,axis = 0)/np.sqrt(undo_better_2s_p.shape[0]))
axes[0].set_xticks([0,1,2], ['worse','same\nUndo to start','better'])
axes[0].set_ylim(0,.7)

stat12, p12 = wilcoxon(undo_better_2s_p[:,0],undo_better_2s_p[:,1])
stat23, p23 = wilcoxon(undo_better_2s_p[:,1],undo_better_2s_p[:,2])
print(stat12)
print(p12)
print(stat23)
print(p23)

#statistics
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.1, 0.05, 'k'
axes[0].plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
axes[0].text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)

x1, x2 = 1,2
y, h, col = np.max([bb[1].get_height(),bb[2].get_height()]) + 0.1, 0.05, 'k'
axes[0].plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
axes[0].text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p23), ha='center', va='bottom', color=col, fontsize = 8)



bb2 = axes[1].bar(range(3), np.nanmean(undo_better_n2s_p,axis=0),
        color=[.7,.7,.7], edgecolor = 'k', 
        yerr=np.nanstd(undo_better_n2s_p,axis = 0)/np.sqrt(undo_better_n2s_p.shape[0]))
axes[1].set_xticks([0,1,2], ['worse','same\nUndo not to start','better'])
axes[1].set_ylim(0,.7)

stat12, p12 = wilcoxon(undo_better_n2s_p[:,0],undo_better_n2s_p[:,1])
stat23, p23 = wilcoxon(undo_better_n2s_p[:,1],undo_better_n2s_p[:,2])
print(stat12)
print(p12)
print(stat23)
print(p23)

#statistics
x1, x2 = 0,1
y, h, col = np.max([bb2[0].get_height(),bb2[1].get_height()]) + 0.1, 0.05, 'k'
axes[1].plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
axes[1].text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)

x1, x2 = 1,2
y, h, col = np.max([bb2[1].get_height(),bb2[2].get_height()]) + 0.1, 0.05, 'k'
axes[1].plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
axes[1].text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p23), ha='center', va='bottom', color=col, fontsize = 8)
479.0
0.014719115686298584
431.0
2.721493900890657e-05
330.0
2.0015475895461355e-08
1189.0
0.21222251859233165
Out[30]:
Text(1.5, 0.5927749883863996, '$p = 0.212223$')

4 conditions: (undo2start, undoNot2start) x (different path, same path) - sequential undo (no need?)¶

In [31]:
undo_same_diff = []
undo_4condi = [] # (undo2start, same), (undo2start, different), (undoNot2start, same), (undoNot2start, different)

for sub in range(100):
    dat_sbj  = sc_data_choice_level[sc_data_choice_level['subjects']==sub].sort_values(["puzzleID","index"])
    undo_same_diff_puzzle = []
    undo_4condi_puzzle = []
    for pzi in np.unique(sc_data_choice_level['puzzleID']):
        same_puzzle = 0
        diff_puzzle = 0

        dat_sbj_pzi = dat_sbj[dat_sbj['puzzleID'] == pzi].reset_index()        
        firstUndo_idx = dat_sbj_pzi[dat_sbj_pzi["firstUndo"]==1].index
        lastUndo_idx = dat_sbj_pzi[dat_sbj_pzi["lastUndo"]==1].index
        lastUndo_idx = np.setdiff1d(lastUndo_idx, firstUndo_idx) # take only accounts for sequential undoes
        choice_idxx = dat_sbj_pzi[dat_sbj_pzi["choice"]==0].index
        undo2start_idx = np.intersect1d(choice_idxx, lastUndo_idx)
        undoNot2start_idx = np.setdiff1d(lastUndo_idx, undo2start_idx)

        u2s_s = np.sum(np.array(dat_sbj_pzi["choice"][undo2start_idx-1]) == np.array(dat_sbj_pzi["choice"][undo2start_idx+1]))
        u2s_d = np.sum(np.array(dat_sbj_pzi["choice"][undo2start_idx-1]) != np.array(dat_sbj_pzi["choice"][undo2start_idx+1]))
        uns_s = np.sum(np.array(dat_sbj_pzi["choice"][undoNot2start_idx-1]) == np.array(dat_sbj_pzi["choice"][undoNot2start_idx+1]))
        uns_d = np.sum(np.array(dat_sbj_pzi["choice"][undoNot2start_idx-1]) != np.array(dat_sbj_pzi["choice"][undoNot2start_idx+1]))

        # undo_same_diff_puzzle.append([same_puzzle, diff_puzzle])
        undo_4condi_puzzle.append([u2s_s, u2s_d, uns_s, uns_d])

        #     undo_same_diff_puzzle =  np.array(undo_same_diff_puzzle)
        #     undo_same_diff_puzzle =  np.sum(undo_same_diff_puzzle,axis=0)
    
    undo_4condi_puzzle = np.array(undo_4condi_puzzle)
    undo_4condi_puzzle = np.sum(undo_4condi_puzzle,axis = 0)
    
    if np.sum(undo_4condi_puzzle)==0:
        print(sub) # who does not undo 

    undo_4condi.append(undo_4condi_puzzle)
    
undo_4condi = np.array(undo_4condi)
8
12
20
25
26
27
28
31
33
34
46
53
57
66
67
76
84
93
94
97
In [32]:
undo_4condi
undo_4condi_p = undo_4condi/ np.sum(undo_4condi,axis = 1)[:,None]
# print(undo_4condi)
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:2: RuntimeWarning: invalid value encountered in true_divide
  
In [33]:
stat12, p12 = wilcoxon(undo_4condi_p[:,0],undo_4condi_p[:,1])
stat34, p34 = wilcoxon(undo_4condi_p[:,2],undo_4condi_p[:,3])
print(stat12)
print(p12)
print(stat34)
print(p34)
3.5
4.245167166785722e-17
11.0
5.413752964972779e-17
In [34]:
%matplotlib notebook
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
plt.figure(figsize=(6,4.5))
plt.bar(range(4), np.nanmean(undo_4condi_p,axis=0),
        color=[.7,.7,.7], edgecolor = 'k', 
        yerr=np.nanstd(undo_4condi_p,axis = 0)/np.sqrt(undo_4condi_p.shape[0]))
plt.xticks([0,0.5,1,2,2.5,3], ['same','\nUndo to start','different','same','\nUndo not to start','different'])
plt.ylabel('Maximum achivable score after undo')
# plt.xlabel('After undoing')
Out[34]:
Text(0, 0.5, 'Maximum achivable score after undo')

NOS: different city (undo target)¶

In [35]:
undo_nos = []

for sub in range(100):
    dat_sbj  = sc_data_choice_level[sc_data_choice_level['subjects']==sub].sort_values(["puzzleID","index"])
    undo_nos_puzzle = []
    for pzi in np.unique(sc_data_choice_level['puzzleID']):
        dat_sbj_pzi = dat_sbj[dat_sbj['puzzleID'] == pzi].reset_index()        

        firstUndo_idx = dat_sbj_pzi[dat_sbj_pzi["firstUndo"]==1].index
        
        lastUndo_idx = dat_sbj_pzi[dat_sbj_pzi["lastUndo"]==1].index
        path_bf_undo = dat_sbj_pzi["currNos"][lastUndo_idx-1] # the mas of the state before undo
        path_af_undo = dat_sbj_pzi["currNos"][lastUndo_idx+1] # the mas of the state after undo
        
        # I think it doesn't mean it choose different path, it means at least once the chosen city is different after undo
        if np.any(np.array(dat_sbj_pzi["choice"][lastUndo_idx-1]) != np.array(dat_sbj_pzi["choice"][lastUndo_idx+1])):
            
            idxx = np.array(dat_sbj_pzi["choice"][lastUndo_idx-1]) != np.array(dat_sbj_pzi["choice"][lastUndo_idx+1])
            undo_nos_puzzle.extend(np.sign(np.array(path_af_undo[idxx]) - np.array(path_bf_undo[idxx])))
#         else:
#             print('hup')
    undo_nos_puzzle =  np.array(undo_nos_puzzle)
    undo_nos.append([np.sum(undo_nos_puzzle<0), np.sum(undo_nos_puzzle==0) ,np.sum(undo_nos_puzzle>0)])
undo_nos = np.array(undo_nos)
In [36]:
# exclude some never undoing subjects
undo_nos = undo_nos[np.where(np.sum(np.array(undo_nos),axis=1)!=0),:]
undo_nos = undo_nos.squeeze()
undo_nos_p = undo_nos/ np.sum(undo_nos,axis = 1)[:,None]
In [37]:
stat13, p13 = wilcoxon(undo_nos_p[:,0],undo_nos_p[:,2])
print(stat13)
print(p13)
982.0
0.008342293270650666
In [38]:
%matplotlib notebook
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
fig = plt.figure(figsize=(6,4.5))
# plt.figure(figsize=(5,3.75))
bb = plt.bar(range(3), np.mean(undo_nos_p,axis=0),
        color=[.7,.7,.7], edgecolor = 'k', 
        yerr=np.std(undo_nos_p,axis = 0)/np.sqrt(undo_nos_p.shape[0]))
plt.ylabel('Number of optimal solutions after undo')
plt.xticks([0,1,2], ['Decreased','Same','Increased']) 

# means they result in less confusing path? 
# less difficult path? 



#statistics
x1, x2 = 0,2
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.1, 0.05, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)

fig.savefig(out_dir + 'proportion_NOS_same_better_worse.png', dpi=600, bbox_inches='tight')

Budget left: different city (undo target)¶

In [39]:
undo_budget = []

for sub in range(100):
    dat_sbj  = sc_data_choice_level[sc_data_choice_level['subjects']==sub].sort_values(["puzzleID","index"])
    undo_budget_puzzle = []
    for pzi in np.unique(sc_data_choice_level['puzzleID']):
        dat_sbj_pzi = dat_sbj[dat_sbj['puzzleID'] == pzi].reset_index()        
        
        firstUndo_idx = dat_sbj_pzi[dat_sbj_pzi["firstUndo"]==1].index
        lastUndo_idx = dat_sbj_pzi[dat_sbj_pzi["lastUndo"]==1].index
        
        submit_idx =  pd.Int64Index([len(dat_sbj_pzi)-1])
        
        t_idx = (firstUndo_idx[1:]-1).to_list()
        t_idx.append(submit_idx.item())
        path_bf_undo = dat_sbj_pzi["leftover"][firstUndo_idx-1]
        path_af_undo = dat_sbj_pzi["leftover"][t_idx]
        
        # I think it doesn't mean it choose different path, it means at least once the chosen city is different after undo
        if np.any(np.array(dat_sbj_pzi["choice"][lastUndo_idx-1]) != np.array(dat_sbj_pzi["choice"][lastUndo_idx+1])):
            
            idxx = np.array(dat_sbj_pzi["choice"][lastUndo_idx-1]) != np.array(dat_sbj_pzi["choice"][lastUndo_idx+1])
            undo_budget_puzzle.extend(np.sign(np.array(path_af_undo[idxx]) - np.array(path_bf_undo[idxx])))
#         else:
#             print('hup')
    undo_budget_puzzle =  np.array(undo_budget_puzzle)
    undo_budget.append([np.sum(undo_budget_puzzle<0), np.sum(undo_budget_puzzle==0) ,np.sum(undo_budget_puzzle>0)])
undo_budget = np.array(undo_budget)
In [40]:
# exclude some never undoing subjects
undo_budget = undo_budget[np.where(np.sum(np.array(undo_budget),axis=1)!=0),:]
undo_budget = undo_budget.squeeze()
undo_budget = undo_budget[:,(0,2)]
undo_budget_p = undo_budget/ np.sum(undo_budget,axis = 1)[:,None]
In [41]:
stat12, p12 = wilcoxon(undo_budget_p[:,0],undo_budget_p[:,1])
print(stat12)
print(p12)
327.5
1.7566375349786966e-11
In [42]:
%matplotlib notebook
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
fig = plt.figure(figsize=(6,4.5))
# plt.figure(figsize=(5,3.75))
bb = plt.bar(range(2), np.mean(undo_budget_p,axis=0),
        color=[.7,.7,.7], edgecolor = 'k', 
        yerr=np.std(undo_budget_p,axis = 0)/np.sqrt(undo_budget_p.shape[0]))
plt.ylabel('Number of optimal solutions after undo')
plt.xticks([0,1], ['Decreased','Increased']) # increased nos means that its much easier?



#statistics
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.1, 0.05, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)

fig.savefig(out_dir + 'proportion_Nos_increased_decreased.png', dpi=600, bbox_inches='tight')

RT¶

In [43]:
RT_diff_around = []
RT_around = []

for sub in range(100):
    dat_sbj  = sc_data_choice_level[sc_data_choice_level['subjects']==sub].sort_values(["puzzleID","index"])
    RT_around_for_puzzle = np.empty((0,2))
    RT_diff_around_for_puzzle = []
    for pzi in np.unique(sc_data_choice_level['puzzleID']):
        dat_sbj_pzi = dat_sbj[dat_sbj['puzzleID'] == pzi].reset_index()        

        firstUndo_idx = dat_sbj_pzi[dat_sbj_pzi["firstUndo"]==1].index
        path_bf_undo = dat_sbj_pzi["RT"][firstUndo_idx-1] # the mas of the state before undo
        
        lastUndo_idx = dat_sbj_pzi[dat_sbj_pzi["lastUndo"]==1].index
        path_af_undo = dat_sbj_pzi["RT"][lastUndo_idx+1] # the mas of the state after undo
        if len(firstUndo_idx) is not 0:
            RT_around_for_puzzle = np.concatenate((RT_around_for_puzzle, np.array([np.array(path_bf_undo), np.array(path_af_undo)]).transpose()),axis=0)

        # I think it doesn't mean it choose different path, it means at least once the chosen city is different after undo
        if np.any(np.array(dat_sbj_pzi["choice"][lastUndo_idx-1]) != np.array(dat_sbj_pzi["choice"][lastUndo_idx+1])):
            
            idxx = np.array(dat_sbj_pzi["choice"][lastUndo_idx-1]) != np.array(dat_sbj_pzi["choice"][lastUndo_idx+1])
            RT_diff_around_for_puzzle.extend(np.array(path_af_undo[idxx]) - np.array(path_bf_undo[idxx]))
#         else
#             print('hup')
    RT_diff_around_for_puzzle =  np.array(RT_diff_around_for_puzzle)
    RT_around_for_puzzle =  np.mean(RT_around_for_puzzle,axis=0)
    RT_diff_around.append([np.sum(RT_diff_around_for_puzzle<0) ,np.sum(RT_diff_around_for_puzzle>0)])
    RT_around.append(RT_around_for_puzzle)
    
RT_diff_around = np.array(RT_diff_around)
RT_around = np.array(RT_around)
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/numpy/core/fromnumeric.py:3441: RuntimeWarning: Mean of empty slice.
  out=out, **kwargs)
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/numpy/core/_methods.py:182: RuntimeWarning: invalid value encountered in true_divide
  ret, rcount, out=ret, casting='unsafe', subok=False)
In [44]:
# exclude some never undoing subjects
RT_diff_around_p = RT_diff_around/ np.sum(RT_diff_around,axis = 1)[:,None]
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:2: RuntimeWarning: invalid value encountered in true_divide
  
In [45]:
print(np.nanmean(RT_around,axis=0))
print(np.nanmean(RT_diff_around_p,axis=0))
[1741.89441925 2559.44066795]
[0.29879522 0.70120478]
In [46]:
stat12, p12 = wilcoxon(RT_diff_around_p[:,0],RT_diff_around_p[:,1])
print(stat12)
print(p12)
300.0
1.222315890309548e-13
In [47]:
%matplotlib notebook
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
plt.figure(figsize=(6,4.5))
# plt.figure(figsize=(5,3.75))
bb = plt.bar(range(2), np.nanmean(RT_diff_around_p,axis=0),
        color=[.7,.7,.7], edgecolor = 'k', 
        yerr=np.nanstd(RT_diff_around_p,axis = 0)/np.sqrt(RT_diff_around_p.shape[0]))
plt.ylabel('Reaction time')
plt.xticks([0,1], ['Decreased','Increased']) # increased nos means that its much easier?



#statistics
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.1, 0.05, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)
Out[47]:
Text(0.5, 0.8512047834180481, '$p = 0.000000$')
In [48]:
RT_around = RT_around/1000
In [49]:
stat12, p12 = wilcoxon(RT_around[:,0],RT_around[:,1])
print(stat12)
print(p12)
589.0
2.802404040905235e-11
In [50]:
%matplotlib notebook
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
plt.figure(figsize=(6,4.5))
# plt.figure(figsize=(5,3.75))
bb = plt.bar(range(2), np.nanmean(RT_around,axis=0),
        color=[.7,.7,.7], edgecolor = 'k', 
        yerr=np.nanstd(RT_around,axis = 0)/np.sqrt(RT_around.shape[0]))
plt.ylabel('Reaction time (s)')
plt.xticks([0,1], ['before','after']) # increased nos means that its much easier?



#statistics
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.3, 0.03,  'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)
Out[50]:
Text(0.5, 2.8894406679458173, '$p = 0.000000$')

Zigzagness¶

In [51]:
import copy
def get_tortuosity(pathList, pzi = 15):
    def cal_dist(xy1, xy2):
        return np.sqrt(np.sum((np.array(xy1)-np.array(xy2))**2))

    pathList_ = copy.deepcopy(pathList)

    ### for the def
    import json
    with open('./util/basicMap.json','rb') as f:
        basic_map = json.load(f)
    tortuosity = []
    ###

    map_pz = basic_map[pzi]

    import numpy as np
    for _ in range(len(pathList_)):
        path = pathList_.pop(0)
        if len(path)>1:
            ct_init = path.pop(0)
#             print(path)
#             print('init{}_tgt{}'.format(ct_init, path[-1]))
#             print('map_init{}_map_tgt{}'.format(map_pz['xy'][ct_init], map_pz['xy'][path[-1]]))
            d_straight = cal_dist(map_pz['xy'][ct_init], map_pz['xy'][path[-1]])
            d_zigzag = 0
            for _ in range(len(path)):
                ct_tgt = path.pop(0)
                d_zigzag += cal_dist(map_pz['xy'][ct_init], map_pz['xy'][ct_tgt])
                ct_init = ct_tgt
            tortuosity.append([d_zigzag, d_straight])
        else:
            tortuosity.append([np.nan, np.nan])
    return np.array(tortuosity)

import difflib

def get_overlap(s1, s2):
    s = difflib.SequenceMatcher(None, s1, s2)
    pos_a, pos_b, size = s.find_longest_match(0, len(s1), 0, len(s2)) 
    return s1[pos_a:pos_a+size]
 
def get_overlaplist(l1,l2,minlen=1):
    matched_seq = []
    for i1 in range(len(l1)):
#         print('l1:{}'.format(i1))
        matches = [i2 for i2 in range(len(l2)) if l2[i2] == l1[i1]]
#         print(matches)
        for m in matches:
            matched_seq_ = []
            i_a = 0
            while ((i1+i_a)<len(l1)) and ((m+i_a)< len(l2)):
                if l1[i1+i_a]==l2[m+i_a]:
                    matched_seq_.append(l1[i1+i_a])
                    i_a += 1
                else:
                    break
            matched_seq.append(matched_seq_)
            
    idx = np.argsort([len(seq) for seq in matched_seq])[::-1]
#     print(idx)

    seqs_ = []
    for i in idx:
        if len(seqs_) == 0:
            seqs_.append(matched_seq[i])
        else:
            if not np.any([np.all(np.isin(matched_seq[i],seq)) for seq in seqs_]):
                seqs_.append(matched_seq[i])
    matched_seq = seqs_.copy()
    idx = np.argsort([len(seq) for seq in matched_seq])
    seqs_ = []
    for i in idx:
        if len(seqs_) == 0:
            seqs_.append(matched_seq[i])
        else:
            if not np.any([np.all(np.isin(matched_seq[i],seq)) for seq in seqs_]):
                seqs_.append(matched_seq[i])
                
    seqs = []
    for seq in seqs_:
#         print(len(seq))
        if not len(seq)<minlen:
            seqs.append(seq)
            
    return seqs

def trimmer(matched_seq,minlen=1):
    idx = np.argsort([len(seq) for seq in matched_seq])[::-1]
#     print(idx)

    seqs_ = []
    for i in idx:
        if len(seqs_) == 0:
            seqs_.append(matched_seq[i])
        else:
            if not np.any([np.all(np.isin(matched_seq[i],seq)) for seq in seqs_]):
                seqs_.append(matched_seq[i])
    matched_seq = seqs_.copy()
    idx = np.argsort([len(seq) for seq in matched_seq])
    seqs_ = []
    for i in idx:
        if len(seqs_) == 0:
            seqs_.append(matched_seq[i])
        else:
            if not np.any([np.all(np.isin(matched_seq[i],seq)) for seq in seqs_]):
                seqs_.append(matched_seq[i])
                
    seqs = []
    for seq in seqs_:
#         print(len(seq))
        if not len(seq)<minlen:
            seqs.append(seq)
            
    return seqs
In [52]:
undo_zigzag = []
undo_zigzag_diff =[]
undo_zigzag_diff_inlen =[]

undo_zigzag_normalized = []


overlap_seq_inorder = []
overlap_seq_inorder_inlen=[]

length_seq = []
undo_length_bfaf = []
undo_length_af = []
undo_length_bf = []

for sub in range(100):
    dat_sbj  = sc_data_choice_level[sc_data_choice_level['subjects']==sub].sort_values(["puzzleID","index"])
    undo_zigzag_puzzle = []
    undo_zigzag_normalized_puzzle = []
    undo_zigzag_diff_for_puzzle = []
    undo_zigzag_diff_inlen_for_puzzle = []
    undo_zigzag_normalized_puzzle = []
    overlap_seq_inorder_for_puzzle = []
    overlap_seq_inorder_inlen_for_puzzle=[]
    undo_length_bf_puzzle=[]
    undo_length_af_puzzle=[]
    
    for pzi in np.unique(sc_data_choice_level['puzzleID']):
        dat_sbj_pzi = dat_sbj[dat_sbj['puzzleID'] == pzi].reset_index()        
 
        firstUndo_idx = dat_sbj_pzi[dat_sbj_pzi["firstUndo"]==1].index
        lastUndo_idx = dat_sbj_pzi[dat_sbj_pzi["lastUndo"]==1].index

        submit_idx =  pd.Int64Index([len(dat_sbj_pzi)-1])
        
        
        if len(firstUndo_idx)>0:
            if len(firstUndo_idx)==1: #
                seq_bf = dat_sbj_pzi["path"][firstUndo_idx-1]
                seq_af = dat_sbj_pzi["path"][submit_idx]
                seq_im = dat_sbj_pzi["path"][lastUndo_idx]
            else:
                seq_bf = dat_sbj_pzi["path"][firstUndo_idx-1]
                t_idx = (firstUndo_idx[1:]-1).to_list()
                t_idx.append(submit_idx.item())
                seq_af = dat_sbj_pzi["path"][t_idx]
                seq_im = dat_sbj_pzi["path"][lastUndo_idx]

            seq_im = seq_im.reset_index()
            pathStr_im = [seq_im.loc[i].path.strip('[').strip(']') for i in range(len(seq_im))]
            pathList_im = [[int(i) for i in pathStr_im[j].split(', ')]  for j in range(len(pathStr_im))]
            pathStr_im = [" ".join([str(a) for a in pathList_im[j]]) for j in range(len(seq_im))]

            seq_bf = seq_bf.reset_index()
            pathStr_bf = [seq_bf.loc[i].path.strip('[').strip(']') for i in range(len(seq_bf))]
            pathList_bf = [[int(i) for i in pathStr_bf[j].split(', ')]  for j in range(len(pathStr_bf))]
            pathStr_bf = [" ".join([str(a) for a in pathList_bf[j]]) for j in range(len(seq_bf))]

            seq_af = seq_af.reset_index()
            pathStr_af = [seq_af.loc[i].path.strip('[').strip(']') for i in range(len(seq_af))]
            pathList_af = [[int(i) for i in pathStr_af[j].split(', ')]  for j in range(len(pathStr_af))]
            pathStr_af = [" ".join([str(a) for a in pathList_af[j]]) for j in range(len(seq_af))]

            pathList_bfim = [np.setdiff1d(np.array(pathList_bf[i]),np.array(pathList_im[i])).tolist() for i in range(len(pathList_bf))]
            pathStr_bfim = [" ".join([str(a) for a in pathList_bfim[j]]) for j in range(len(pathList_im))]

            pathList_afim = [np.setdiff1d(np.array(pathList_af[i]),np.array(pathList_im[i])).tolist() for i in range(len(pathList_af))]
            pathStr_afim = [" ".join([str(a) for a in pathList_afim[j]]) for j in range(len(pathList_im))]

            tor_bf = get_tortuosity(pathList_bfim, pzi)
            path_bf_undo = (tor_bf[:,0]/tor_bf[:,1])
            tor_af = get_tortuosity(pathList_afim, pzi)
            path_af_undo = (tor_af[:,0]/tor_af[:,1])
            
            len_path_bf = np.array([len(path) for path in pathList_bfim])
            len_path_af = np.array([len(path) for path in pathList_afim])
            

            
            
            zigzag_diff = np.array(path_af_undo) - np.array(path_bf_undo)
            
            overlap_seq_2 = []
            zigzag_diff_2 = []
            seq_inorder_2 = []
            
            for i in range(len(seq_af)):
                temp = []
                for j in range(i+1):
            #         temp.append(get_overlaplist(pathList_afim[i], pathList_bfim[j]))
                    temptemp = get_overlaplist(pathList_afim[i], pathList_bfim[j])
                    if not len(temptemp)==0:
                        temp.extend(temptemp)

                temp = trimmer(temp)
                if len(temp)==0:
                    temp.append([])

                seq_inorder_2.append(temp)

            #     len_seq_inorder_2 = [len(s) for s in temp if len(s)!=0]
                len_seq_inorder_2 =[]
                tempzigd = []
                for s in temp:
#                     print('*')
                    if len(s)==0:
                        len_seq_inorder_2.append(0)
                    else:
                        len_seq_inorder_2.append(len(s))
                        
                    tempzigd.append(zigzag_diff[i])
                    
                        
                overlap_seq_2.extend(len_seq_inorder_2)
                zigzag_diff_2.extend(tempzigd)
            
            # I think it doesn't mean it choose different path, it means at least once the chosen city is different after undo
            if np.any(np.array(dat_sbj_pzi["choice"][lastUndo_idx-1]) != np.array(dat_sbj_pzi["choice"][lastUndo_idx+1])):
                idxx = np.array(dat_sbj_pzi["choice"][lastUndo_idx-1]) != np.array(dat_sbj_pzi["choice"][lastUndo_idx+1])
                undo_zigzag_puzzle.extend(np.sign(np.array(path_af_undo[idxx]) - np.array(path_bf_undo[idxx])))
                
                undo_zigzag_normalized_puzzle.extend(np.sign(np.array(path_af_undo[idxx])/len_path_af[idxx] - np.array(path_bf_undo[idxx])/len_path_bf[idxx]))

                
                undo_length_bf_puzzle.extend(len_path_bf[idxx].tolist())
                undo_length_af_puzzle.extend(len_path_af[idxx].tolist())
                
                overlap_seq_inorder_for_puzzle.append([np.sum(np.array(overlap_seq_2)==0), np.sum(np.array(overlap_seq_2)!=0)])
                overlap_seq_inorder_inlen_for_puzzle.append([np.sum(np.array(overlap_seq_2)==0), np.sum(np.array(overlap_seq_2)==1), 
                                                             np.sum(np.array(overlap_seq_2)==2), np.sum(np.array(overlap_seq_2)>2),])

                length_seq.extend(overlap_seq_2)
                
                undo_zigzag_diff_for_puzzle.append([np.sum(np.array(zigzag_diff_2)[np.array(overlap_seq_2)==0]) , np.sum(np.array(zigzag_diff_2)[np.array(overlap_seq_2)!=0]) ] )
                undo_zigzag_diff_inlen_for_puzzle.append([np.sum(np.array(zigzag_diff_2)[np.array(overlap_seq_2)==0]), np.sum(np.array(zigzag_diff_2)[np.array(overlap_seq_2)==1]), 
                                                             np.sum(np.array(zigzag_diff_2)[np.array(overlap_seq_2)==2]), np.sum(np.array(zigzag_diff_2)[np.array(overlap_seq_2)>2]),])

            

#         else:
#             overlap_seq_inorder_for_puzzle.append([np.nan,np.nan])
#             overlap_seq_inorder_inlen_for_puzzle.append([np.nan,np.nan,np.nan,np.nan])
    
    overlap_seq_inorder_for_puzzle = np.array(overlap_seq_inorder_for_puzzle)
    temp = overlap_seq_inorder_for_puzzle.copy()
#     overlap_seq_inorder.append(np.nansum(temp,axis=0))
#     print('*'*10)
    if len(temp) == 0:
        overlap_seq_inorder.append(np.zeros((2)))
    else:
        overlap_seq_inorder.append(np.nansum(temp,axis=0))


    overlap_seq_inorder_inlen_for_puzzle=np.array(overlap_seq_inorder_inlen_for_puzzle)
    temp = overlap_seq_inorder_inlen_for_puzzle.copy()
    
    if len(temp) == 0:
        overlap_seq_inorder_inlen.append(np.zeros((4)))
    else:
        overlap_seq_inorder_inlen.append(np.nansum(temp,axis=0))
        
    undo_zigzag_puzzle =  np.array(undo_zigzag_puzzle)
    undo_zigzag_normalized_puzzle = np.array(undo_zigzag_normalized_puzzle)
#     undo_zigzag.append([np.sum(undo_zigzag_puzzle<0), np.sum(undo_zigzag_puzzle==0) ,np.sum(undo_zigzag_puzzle>0)])
    undo_zigzag.append([np.sum(undo_zigzag_puzzle<0) ,np.sum(undo_zigzag_puzzle>0)])
    undo_zigzag_normalized.append([np.sum(undo_zigzag_normalized_puzzle<0) ,np.sum(undo_zigzag_normalized_puzzle>0)])
    
    
    undo_length_af.extend(undo_length_af_puzzle)
    undo_length_bf.extend(undo_length_bf_puzzle)
    # undo_length_bfaf.append(np.array([undo_length_bf_puzzle, undo_length_af_puzzle]))


    undo_zigzag_diff_for_puzzle=np.array(undo_zigzag_diff_for_puzzle)
    temp = undo_zigzag_diff_for_puzzle.copy()
    if len(temp) == 0:
        undo_zigzag_diff.append(np.zeros((2)))
    else:
        undo_zigzag_diff.append(np.nansum(temp,axis=0))
                                
                                
                                
    undo_zigzag_diff_inlen_for_puzzle=np.array(undo_zigzag_diff_inlen_for_puzzle)
    temp = undo_zigzag_diff_inlen_for_puzzle.copy()
    
    if len(temp) == 0:
        undo_zigzag_diff_inlen.append(np.zeros((4)))
    else:
        undo_zigzag_diff_inlen.append(np.nansum(temp,axis=0))
    
    
undo_zigzag = np.array(undo_zigzag)
undo_zigzag_normalized = np.array(undo_zigzag_normalized)
undo_zigzag_diff = np.array(undo_zigzag_diff)
undo_zigzag_diff_inlen = np.array(undo_zigzag_diff_inlen)

overlap_seq_inorder = np.array(overlap_seq_inorder)
overlap_seq_inorder_inlen = np.array(overlap_seq_inorder_inlen)
undo_length_bfaf = np.array([undo_length_bf, undo_length_af])
In [53]:
overlap_seq_inorder_inlen_minlen2 = overlap_seq_inorder_inlen.copy()
overlap_seq_inorder_inlen_minlen2 = overlap_seq_inorder_inlen_minlen2[:,np.setdiff1d(range(overlap_seq_inorder_inlen_minlen2.shape[1]),1)]
overlap_seq_inorder_minlen2 = overlap_seq_inorder_inlen_minlen2.copy()
overlap_seq_inorder_minlen2 = np.array([overlap_seq_inorder_minlen2[:,0], np.sum(overlap_seq_inorder_minlen2[:,1:],axis=1) ]).transpose()

overlap_seq_inorder_p = overlap_seq_inorder/np.sum(overlap_seq_inorder,axis=1)[:,None]
overlap_seq_inorder_inlen_p = overlap_seq_inorder_inlen/np.sum(overlap_seq_inorder_inlen,axis=1)[:,None]


overlap_seq_inorder_minlen2_p = overlap_seq_inorder_minlen2/np.sum(overlap_seq_inorder_minlen2,axis=1)[:,None]
overlap_seq_inorder_inlen_minlen2_p = overlap_seq_inorder_inlen_minlen2/np.sum(overlap_seq_inorder_inlen_minlen2,axis=1)[:,None]
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:6: RuntimeWarning: invalid value encountered in true_divide
  
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:7: RuntimeWarning: invalid value encountered in true_divide
  import sys
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in true_divide
  # Remove the CWD from sys.path while we load stuff.
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:11: RuntimeWarning: invalid value encountered in true_divide
  # This is added back by InteractiveShellApp.init_path()
In [54]:
undo_zigzag_diff_inlen_minlen2 = undo_zigzag_diff_inlen.copy()
undo_zigzag_diff_inlen_minlen2 = undo_zigzag_diff_inlen_minlen2[:,np.setdiff1d(range(undo_zigzag_diff_inlen_minlen2.shape[1]),1)]
undo_zigzag_diff_minlen2 = undo_zigzag_diff_inlen_minlen2.copy()
undo_zigzag_diff_minlen2 = np.array([undo_zigzag_diff_minlen2[:,0], np.sum(undo_zigzag_diff_minlen2[:,1:],axis=1) ]).transpose()

undo_zigzag_diff_minlen2_p = undo_zigzag_diff_minlen2/np.sum(undo_zigzag_diff_minlen2,axis=1)[:,None]
undo_zigzag_diff_inlen_minlen2_p = undo_zigzag_diff_inlen_minlen2/np.sum(undo_zigzag_diff_inlen_minlen2,axis=1)[:,None]
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:6: RuntimeWarning: invalid value encountered in true_divide
  
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:7: RuntimeWarning: invalid value encountered in true_divide
  import sys
In [55]:
# exclude some never undoing subjects
undo_zigzag = undo_zigzag[np.where(np.sum(np.array(undo_zigzag),axis=1)!=0),:]
undo_zigzag = undo_zigzag.squeeze()
undo_zigzag_p = undo_zigzag/ np.sum(undo_zigzag,axis = 1)[:,None]


undo_zigzag_normalized = undo_zigzag_normalized[np.where(np.sum(np.array(undo_zigzag_normalized),axis=1)!=0),:]
undo_zigzag_normalized = undo_zigzag_normalized.squeeze()
undo_zigzag_normalized_p = undo_zigzag_normalized/ np.sum(undo_zigzag_normalized,axis = 1)[:,None]
In [56]:
stat12, p12 = wilcoxon(undo_zigzag_p[:,0],undo_zigzag_p[:,1])
print(stat12)
print(p12)
125.0
2.4502913299164797e-11
In [57]:
stat12, p12 = wilcoxon(undo_zigzag_normalized_p[:,0],undo_zigzag_normalized_p[:,1])
print(stat12)
print(p12)
536.0
9.889191051818236e-05
In [58]:
%matplotlib inline
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
fig = plt.figure(figsize=(6,4.5))
# plt.figure(figsize=(5,3.75))
bb = plt.bar(range(2), np.mean(undo_zigzag_p,axis=0),
        color=[.7,.7,.7], edgecolor = 'k', 
        yerr=np.std(undo_zigzag_p,axis = 0)/np.sqrt(undo_zigzag_p.shape[0]))
plt.ylabel('Zigzag-ness after undo')
plt.xticks([0,1], ['Decreased','Increased']) 

# means they got into the path that has better mas, and less confusing?




#statistics
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.1, 0.02, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)


fig.savefig(out_dir + 'proportion_zigzagness_increased_decreased.png', dpi=600, bbox_inches='tight')
In [59]:
%matplotlib inline
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
fig = plt.figure(figsize=(6,4.5))
# plt.figure(figsize=(5,3.75))
bb = plt.bar(range(2), np.mean(undo_zigzag_normalized_p,axis=0),
        color=[.7,.7,.7], edgecolor = 'k', 
        yerr=np.std(undo_zigzag_normalized_p,axis = 0)/np.sqrt(undo_zigzag_normalized_p.shape[0]))
plt.ylabel('Zigzag-ness after undo (divided by length')
plt.xticks([0,1], ['Decreased','Increased']) 

# means they got into the path that has better mas, and less confusing?




#statistics
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.1, 0.02, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)


fig.savefig(out_dir + 'proportion_normalized_zigzagness_increased_decreased.png', dpi=600, bbox_inches='tight')

Overlapped sequence¶

overlapped sequence from all of the prev seqs.¶

In [60]:
import difflib

def get_overlap(s1, s2):
    s = difflib.SequenceMatcher(None, s1, s2)
    pos_a, pos_b, size = s.find_longest_match(0, len(s1), 0, len(s2)) 
    return s1[pos_a:pos_a+size]
 
def get_overlaplist(l1,l2,minlen=1):
    matched_seq = []
    for i1 in range(len(l1)):
#         print('l1:{}'.format(i1))
        matches = [i2 for i2 in range(len(l2)) if l2[i2] == l1[i1]]
#         print(matches)
        for m in matches:
            matched_seq_ = []
            i_a = 0
            while ((i1+i_a)<len(l1)) and ((m+i_a)< len(l2)):
                if l1[i1+i_a]==l2[m+i_a]:
                    matched_seq_.append(l1[i1+i_a])
                    i_a += 1
                else:
                    break
            matched_seq.append(matched_seq_)
            
    idx = np.argsort([len(seq) for seq in matched_seq])[::-1]
#     print(idx)

    seqs_ = []
    for i in idx:
        if len(seqs_) == 0:
            seqs_.append(matched_seq[i])
        else:
            if not np.any([np.all(np.isin(matched_seq[i],seq)) for seq in seqs_]):
                seqs_.append(matched_seq[i])
    matched_seq = seqs_.copy()
    idx = np.argsort([len(seq) for seq in matched_seq])
    seqs_ = []
    for i in idx:
        if len(seqs_) == 0:
            seqs_.append(matched_seq[i])
        else:
            if not np.any([np.all(np.isin(matched_seq[i],seq)) for seq in seqs_]):
                seqs_.append(matched_seq[i])
                
    seqs = []
    for seq in seqs_:
#         print(len(seq))
        if not len(seq)<minlen:
            seqs.append(seq)
            
    return seqs

def trimmer(matched_seq,minlen=1):
    idx = np.argsort([len(seq) for seq in matched_seq])[::-1]
#     print(idx)

    seqs_ = []
    for i in idx:
        if len(seqs_) == 0:
            seqs_.append(matched_seq[i])
        else:
            if not np.any([np.all(np.isin(matched_seq[i],seq)) for seq in seqs_]):
                seqs_.append(matched_seq[i])
    matched_seq = seqs_.copy()
    idx = np.argsort([len(seq) for seq in matched_seq])
    seqs_ = []
    for i in idx:
        if len(seqs_) == 0:
            seqs_.append(matched_seq[i])
        else:
            if not np.any([np.all(np.isin(matched_seq[i],seq)) for seq in seqs_]):
                seqs_.append(matched_seq[i])
                
    seqs = []
    for seq in seqs_:
#         print(len(seq))
        if not len(seq)<minlen:
            seqs.append(seq)
            
    return seqs
In [61]:
overlap_seq_inorder = []
overlap_seq_inorder_inlen=[]

currmas = []
currmas_inlen=[]
endmas = []
endmas_inlen=[]

currnos = []
currnos_inlen=[]
leftover = []
leftover_inlen=[]

RT = []
RT_inlen = []


length_seq = []

for sub in range(100):
    dat_sbj  = sc_data_choice_level[sc_data_choice_level['subjects']==sub].sort_values(["puzzleID","index"])
    overlap_seq_inorder_for_puzzle = []
    overlap_seq_inorder_inlen_for_puzzle=[]
    currmas_for_puzzle = []
    currmas_inlen_for_puzzle=[]
    endmas_for_puzzle = []
    endmas_inlen_for_puzzle=[]
    currnos_for_puzzle = []
    currnos_inlen_for_puzzle=[]
    leftover_for_puzzle = []
    leftover_inlen_for_puzzle=[]
    RT_for_puzzle = []
    RT_inlen_for_puzzle = []

    for pzi in np.unique(sc_data_choice_level['puzzleID']):
        dat_sbj_pzi = dat_sbj[dat_sbj['puzzleID'] == pzi].reset_index()        

        firstUndo_idx = dat_sbj_pzi[dat_sbj_pzi["firstUndo"]==1].index
        path_bf_undo = dat_sbj_pzi["currMas"][firstUndo_idx-1] # the mas of the state before undo

        lastUndo_idx = dat_sbj_pzi[dat_sbj_pzi["lastUndo"]==1].index
        path_af_undo = dat_sbj_pzi["currMas"][lastUndo_idx+1] # the mas of the state after undo
        
        submit_idx =  pd.Int64Index([len(dat_sbj_pzi)-1])


        if len(firstUndo_idx)>0:
            if len(firstUndo_idx)==1: #
                seq_bf = dat_sbj_pzi["path"][firstUndo_idx-1]
                seq_af = dat_sbj_pzi["path"][submit_idx]
                seq_im = dat_sbj_pzi["path"][lastUndo_idx]
                currmas_bf = dat_sbj_pzi["currMas"][lastUndo_idx-1]
                currmas_af = dat_sbj_pzi["currMas"][lastUndo_idx+1]
                endmas_bf = dat_sbj_pzi["currMas"][firstUndo_idx-1]
                endmas_af = dat_sbj_pzi["currMas"][submit_idx]
                
                
                currnos_bf = dat_sbj_pzi["currNos"][lastUndo_idx-1]
                currnos_af = dat_sbj_pzi["currNos"][lastUndo_idx+1]
                
                leftover_bf = dat_sbj_pzi["leftover"][firstUndo_idx-1]
                leftover_af = dat_sbj_pzi["leftover"][submit_idx]
                
                RT_bf = dat_sbj_pzi["RT"][lastUndo_idx-1]
                RT_af = dat_sbj_pzi["RT"][lastUndo_idx+1]

            else:
                seq_bf = dat_sbj_pzi["path"][firstUndo_idx-1]
                t_idx = (firstUndo_idx[1:]-1).to_list()
                t_idx.append(submit_idx.item())
                seq_af = dat_sbj_pzi["path"][t_idx]
                seq_im = dat_sbj_pzi["path"][lastUndo_idx]
                currmas_bf = dat_sbj_pzi["currMas"][lastUndo_idx-1]
                currmas_af = dat_sbj_pzi["currMas"][lastUndo_idx+1]
                endmas_bf = dat_sbj_pzi["currMas"][firstUndo_idx-1]
                endmas_af = dat_sbj_pzi["currMas"][t_idx]
                
                
                currnos_bf = dat_sbj_pzi["currNos"][lastUndo_idx-1]
                currnos_af = dat_sbj_pzi["currNos"][lastUndo_idx+1]
                
                leftover_bf = dat_sbj_pzi["leftover"][firstUndo_idx-1]
                leftover_af = dat_sbj_pzi["leftover"][t_idx]
                
                RT_bf = dat_sbj_pzi["RT"][lastUndo_idx-1]
                RT_af = dat_sbj_pzi["RT"][lastUndo_idx+1]

            seq_im = seq_im.reset_index()
            pathStr_im = [seq_im.loc[i].path.strip('[').strip(']') for i in range(len(seq_im))]
            pathList_im = [[int(i) for i in pathStr_im[j].split(', ')]  for j in range(len(pathStr_im))]
            pathStr_im = [" ".join([str(a) for a in pathList_im[j]]) for j in range(len(seq_im))]

            seq_bf = seq_bf.reset_index()
            pathStr_bf = [seq_bf.loc[i].path.strip('[').strip(']') for i in range(len(seq_bf))]
            pathList_bf = [[int(i) for i in pathStr_bf[j].split(', ')]  for j in range(len(pathStr_bf))]
            pathStr_bf = [" ".join([str(a) for a in pathList_bf[j]]) for j in range(len(seq_bf))]

            seq_af = seq_af.reset_index()
            pathStr_af = [seq_af.loc[i].path.strip('[').strip(']') for i in range(len(seq_af))]
            pathList_af = [[int(i) for i in pathStr_af[j].split(', ')]  for j in range(len(pathStr_af))]
            pathStr_af = [" ".join([str(a) for a in pathList_af[j]]) for j in range(len(seq_af))]

            pathList_bfim = [np.setdiff1d(np.array(pathList_bf[i]),np.array(pathList_im[i])).tolist() for i in range(len(pathList_bf))]
            pathStr_bfim = [" ".join([str(a) for a in pathList_bfim[j]]) for j in range(len(pathList_im))]

            pathList_afim = [np.setdiff1d(np.array(pathList_af[i]),np.array(pathList_im[i])).tolist() for i in range(len(pathList_af))]
            pathStr_afim = [" ".join([str(a) for a in pathList_afim[j]]) for j in range(len(pathList_im))]


            currmas_dif = np.array(currmas_af) - np.array(currmas_bf)
            endmas_dif = np.array(endmas_af) - np.array(endmas_bf)
            
            currnos_dif = np.array(currnos_af) - np.array(currnos_bf)
            leftover_dif = np.array(leftover_af) - np.array(leftover_bf)
            
            rt_dif = np.array(RT_af) - np.array(RT_bf)
            
            
            overlap_seq_2 = []
            seq_inorder_2 = []
            currmas_dif_2 = []
            endmas_dif_2 = []
            currnos_dif_2 = []
            leftover_dif_2 = []
            RT_dif_2 = []
            for i in range(len(seq_af)):
                temp = []
                for j in range(i+1):
            #         temp.append(get_overlaplist(pathList_afim[i], pathList_bfim[j]))
                    temptemp = get_overlaplist(pathList_afim[i], pathList_bfim[j])
                    if not len(temptemp)==0:
                        temp.extend(temptemp)

                temp = trimmer(temp)
                if len(temp)==0:
                    temp.append([])

                seq_inorder_2.append(temp)

            #     len_seq_inorder_2 = [len(s) for s in temp if len(s)!=0]
                len_seq_inorder_2 =[]
                tempcurd = []
                tempendd = []
                tempnosd = []
                templeft = []
                temprt = []
                for s in temp:
#                     print('*')
                    if len(s)==0:
                        len_seq_inorder_2.append(0)
                    else:
                        len_seq_inorder_2.append(len(s))
                    tempcurd.append(currmas_dif[i])
                    tempendd.append(endmas_dif[i])
                    tempnosd.append(currnos_dif[i])
                    templeft.append(leftover_dif[i])
                    temprt.append(rt_dif[i])
                    
                overlap_seq_2.extend(len_seq_inorder_2)
                currmas_dif_2.extend(tempcurd)
                endmas_dif_2.extend(tempendd)
                currnos_dif_2.extend(tempnosd)
                leftover_dif_2.extend(templeft)
                RT_dif_2.extend(temprt)
                
#                 print(overlap_seq_2)
#                 print(currmas_dif_2)


            overlap_seq_inorder_for_puzzle.append([np.sum(np.array(overlap_seq_2)==0), np.sum(np.array(overlap_seq_2)!=0)])
            overlap_seq_inorder_inlen_for_puzzle.append([np.sum(np.array(overlap_seq_2)==0), np.sum(np.array(overlap_seq_2)==1), 
                                                         np.sum(np.array(overlap_seq_2)==2), np.sum(np.array(overlap_seq_2)>2),])


            currmas_for_puzzle.append([np.sum(np.array(currmas_dif_2)[np.array(overlap_seq_2)==0]) , np.sum(np.array(currmas_dif_2)[np.array(overlap_seq_2)!=0]) ] )
            currmas_inlen_for_puzzle.append([np.sum(np.array(currmas_dif_2)[np.array(overlap_seq_2)==0]), np.sum(np.array(currmas_dif_2)[np.array(overlap_seq_2)==1]), 
                                                         np.sum(np.array(currmas_dif_2)[np.array(overlap_seq_2)==2]), np.sum(np.array(currmas_dif_2)[np.array(overlap_seq_2)>2]),])
            endmas_for_puzzle.append([np.sum(np.array(endmas_dif_2)[np.array(overlap_seq_2)==0]) , np.sum(np.array(endmas_dif_2)[np.array(overlap_seq_2)!=0]) ] )
            endmas_inlen_for_puzzle.append([np.sum(np.array(endmas_dif_2)[np.array(overlap_seq_2)==0]), np.sum(np.array(endmas_dif_2)[np.array(overlap_seq_2)==1]), 
                                                         np.sum(np.array(endmas_dif_2)[np.array(overlap_seq_2)==2]), np.sum(np.array(endmas_dif_2)[np.array(overlap_seq_2)>2]),])
#             print(overlap_seq_2)
            length_seq.extend(overlap_seq_2)
    

            currnos_for_puzzle.append([np.sum(np.array(currnos_dif_2)[np.array(overlap_seq_2)==0]) , np.sum(np.array(currnos_dif_2)[np.array(overlap_seq_2)!=0]) ] )
            currnos_inlen_for_puzzle.append([np.sum(np.array(currnos_dif_2)[np.array(overlap_seq_2)==0]), np.sum(np.array(currnos_dif_2)[np.array(overlap_seq_2)==1]), 
                                                         np.sum(np.array(currnos_dif_2)[np.array(overlap_seq_2)==2]), np.sum(np.array(currnos_dif_2)[np.array(overlap_seq_2)>2]),])
            
            leftover_for_puzzle.append([np.sum(np.array(leftover_dif_2)[np.array(overlap_seq_2)==0]) , np.sum(np.array(leftover_dif_2)[np.array(overlap_seq_2)!=0]) ] )
            leftover_inlen_for_puzzle.append([np.sum(np.array(leftover_dif_2)[np.array(overlap_seq_2)==0]), np.sum(np.array(leftover_dif_2)[np.array(overlap_seq_2)==1]), 
                                                         np.sum(np.array(leftover_dif_2)[np.array(overlap_seq_2)==2]), np.sum(np.array(leftover_dif_2)[np.array(overlap_seq_2)>2]),])


            RT_for_puzzle.append([np.sum(np.array(RT_dif_2)[np.array(overlap_seq_2)==0]) , np.sum(np.array(RT_dif_2)[np.array(overlap_seq_2)!=0]) ] )
            RT_inlen_for_puzzle.append([np.sum(np.array(RT_dif_2)[np.array(overlap_seq_2)==0]), np.sum(np.array(RT_dif_2)[np.array(overlap_seq_2)==1]), 
                                                         np.sum(np.array(RT_dif_2)[np.array(overlap_seq_2)==2]), np.sum(np.array(RT_dif_2)[np.array(overlap_seq_2)>2]),])
            
            
        else:
            overlap_seq_inorder_for_puzzle.append([np.nan,np.nan])
            overlap_seq_inorder_inlen_for_puzzle.append([np.nan,np.nan,np.nan,np.nan])
            currmas_for_puzzle.append([np.nan,np.nan])
            currmas_inlen_for_puzzle.append([np.nan,np.nan,np.nan,np.nan])
            endmas_for_puzzle.append([np.nan,np.nan])
            endmas_inlen_for_puzzle.append([np.nan,np.nan,np.nan,np.nan])
            currnos_for_puzzle.append([np.nan,np.nan])
            currnos_inlen_for_puzzle.append([np.nan,np.nan,np.nan,np.nan])
            leftover_for_puzzle.append([np.nan,np.nan])
            leftover_inlen_for_puzzle.append([np.nan,np.nan,np.nan,np.nan])
            RT_for_puzzle.append([np.nan,np.nan])
            RT_inlen_for_puzzle.append([np.nan,np.nan,np.nan,np.nan])
            
    overlap_seq_inorder_for_puzzle = np.array(overlap_seq_inorder_for_puzzle)
    temp = overlap_seq_inorder_for_puzzle.copy()
    overlap_seq_inorder.append(np.nansum(temp,axis=0))

    overlap_seq_inorder_inlen_for_puzzle=np.array(overlap_seq_inorder_inlen_for_puzzle)
    temp = overlap_seq_inorder_inlen_for_puzzle.copy()
    overlap_seq_inorder_inlen.append(np.nansum(temp,axis=0))
    
            
    currmas_for_puzzle = np.array(currmas_for_puzzle)
    temp = currmas_for_puzzle.copy()
    currmas.append(np.nansum(temp,axis=0))

    currmas_inlen_for_puzzle=np.array(currmas_inlen_for_puzzle)
    temp = currmas_inlen_for_puzzle.copy()
    currmas_inlen.append(np.nansum(temp,axis=0))
    
            
    endmas_for_puzzle = np.array(endmas_for_puzzle)
    temp = endmas_for_puzzle.copy()
    endmas.append(np.nansum(temp,axis=0))

    endmas_inlen_for_puzzle=np.array(endmas_inlen_for_puzzle)
    temp = endmas_inlen_for_puzzle.copy()
    endmas_inlen.append(np.nansum(temp,axis=0))
    
    
    currnos_for_puzzle = np.array(currnos_for_puzzle)
    temp = currnos_for_puzzle.copy()
    currnos.append(np.nansum(temp,axis=0))

    currnos_inlen_for_puzzle=np.array(currnos_inlen_for_puzzle)
    temp = currnos_inlen_for_puzzle.copy()
    currnos_inlen.append(np.nansum(temp,axis=0))
            
    leftover_for_puzzle = np.array(leftover_for_puzzle)
    temp = leftover_for_puzzle.copy()
    leftover.append(np.nansum(temp,axis=0))

    leftover_inlen_for_puzzle=np.array(leftover_inlen_for_puzzle)
    temp = leftover_inlen_for_puzzle.copy()
    leftover_inlen.append(np.nansum(temp,axis=0))
    
    
    RT_for_puzzle = np.array(RT_for_puzzle)
    temp = RT_for_puzzle.copy()
    RT.append(np.nansum(temp,axis=0))

    RT_inlen_for_puzzle=np.array(RT_inlen_for_puzzle)
    temp = RT_inlen_for_puzzle.copy()
    RT_inlen.append(np.nansum(temp,axis=0))
    
    
overlap_seq_inorder = np.array(overlap_seq_inorder)
overlap_seq_inorder_inlen = np.array(overlap_seq_inorder_inlen)
currmas = np.array(currmas)
currmas_inlen = np.array(currmas_inlen)
endmas = np.array(endmas)
endmas_inlen = np.array(endmas_inlen)
leftover = np.array(leftover)
leftover_inlen = np.array(leftover_inlen)
currnos = np.array(currnos)
currnos_inlen = np.array(currnos_inlen)
RT = np.array(RT)
RT_inlen = np.array(RT_inlen)
In [62]:
overlap_seq_inorder_inlen_minlen2 = overlap_seq_inorder_inlen.copy()
overlap_seq_inorder_inlen_minlen2 = overlap_seq_inorder_inlen_minlen2[:,np.setdiff1d(range(overlap_seq_inorder_inlen_minlen2.shape[1]),1)]
overlap_seq_inorder_minlen2 = overlap_seq_inorder_inlen_minlen2.copy()
overlap_seq_inorder_minlen2 = np.array([overlap_seq_inorder_minlen2[:,0], np.sum(overlap_seq_inorder_minlen2[:,1:],axis=1) ]).transpose()

overlap_seq_inorder_p = overlap_seq_inorder/np.sum(overlap_seq_inorder,axis=1)[:,None]
overlap_seq_inorder_inlen_p = overlap_seq_inorder_inlen/np.sum(overlap_seq_inorder_inlen,axis=1)[:,None]


overlap_seq_inorder_minlen2_p = overlap_seq_inorder_minlen2/np.sum(overlap_seq_inorder_minlen2,axis=1)[:,None]
overlap_seq_inorder_inlen_minlen2_p = overlap_seq_inorder_inlen_minlen2/np.sum(overlap_seq_inorder_inlen_minlen2,axis=1)[:,None]
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:6: RuntimeWarning: invalid value encountered in true_divide
  
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:7: RuntimeWarning: invalid value encountered in true_divide
  import sys
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in true_divide
  # Remove the CWD from sys.path while we load stuff.
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:11: RuntimeWarning: invalid value encountered in true_divide
  # This is added back by InteractiveShellApp.init_path()
In [63]:
currmas_p = np.divide(currmas, np.sum(overlap_seq_inorder,axis=1).reshape(-1,1))
currmas_inlen_p = np.divide(currmas_inlen, np.sum(overlap_seq_inorder_inlen,axis=1).reshape(-1,1))

endmas_p = np.divide(endmas, np.sum(overlap_seq_inorder,axis=1).reshape(-1,1))
endmas_inlen_p = np.divide(endmas_inlen, np.sum(overlap_seq_inorder_inlen,axis=1).reshape(-1,1))

currnos_p = np.divide(currnos, np.sum(overlap_seq_inorder,axis=1).reshape(-1,1))
currnos_inlen_p = np.divide(currnos_inlen, np.sum(overlap_seq_inorder_inlen,axis=1).reshape(-1,1))

leftover_p = np.divide(leftover, np.sum(overlap_seq_inorder,axis=1).reshape(-1,1))
leftover_inlen_p = np.divide(leftover_inlen, np.sum(overlap_seq_inorder_inlen,axis=1).reshape(-1,1))

RT_p = np.divide(RT, np.sum(overlap_seq_inorder,axis=1).reshape(-1,1))
RT_inlen_p = np.divide(RT_inlen, np.sum(overlap_seq_inorder_inlen,axis=1).reshape(-1,1))
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:1: RuntimeWarning: invalid value encountered in true_divide
  """Entry point for launching an IPython kernel.
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:2: RuntimeWarning: invalid value encountered in true_divide
  
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:4: RuntimeWarning: invalid value encountered in true_divide
  after removing the cwd from sys.path.
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:5: RuntimeWarning: invalid value encountered in true_divide
  """
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:7: RuntimeWarning: invalid value encountered in true_divide
  import sys
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:8: RuntimeWarning: invalid value encountered in true_divide
  
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in true_divide
  # Remove the CWD from sys.path while we load stuff.
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:11: RuntimeWarning: invalid value encountered in true_divide
  # This is added back by InteractiveShellApp.init_path()
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:13: RuntimeWarning: invalid value encountered in true_divide
  del sys.path[0]
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:14: RuntimeWarning: invalid value encountered in true_divide
  
In [64]:
currmas_inlen_minlen2 = currmas_inlen.copy()
currmas_inlen_minlen2 = currmas_inlen_minlen2[:,np.setdiff1d(range(currmas_inlen_minlen2.shape[1]),1)]
endmas_inlen_minlen2 = endmas_inlen.copy()
endmas_inlen_minlen2 = endmas_inlen_minlen2[:,np.setdiff1d(range(endmas_inlen_minlen2.shape[1]),1)]

currmas_minlen2 = currmas_inlen_minlen2.copy()
currmas_minlen2 = np.array([currmas_minlen2[:,0], np.sum(currmas_minlen2[:,1:],axis=1) ]).transpose()

endmas_minlen2 = endmas_inlen_minlen2.copy()
endmas_minlen2 = np.array([endmas_minlen2[:,0], np.sum(endmas_minlen2[:,1:],axis=1) ]).transpose()

currmas_minlen2_p = np.divide(currmas_minlen2, np.sum(overlap_seq_inorder_minlen2,axis=1).reshape(-1,1))
currmas_inlen_minlen2_p = np.divide(currmas_inlen_minlen2, np.sum(overlap_seq_inorder_inlen_minlen2,axis=1).reshape(-1,1))
endmas_minlen2_p = np.divide(endmas_minlen2, np.sum(overlap_seq_inorder_minlen2,axis=1).reshape(-1,1))
endmas_inlen_minlen2_p = np.divide(endmas_inlen_minlen2, np.sum(overlap_seq_inorder_inlen_minlen2,axis=1).reshape(-1,1))
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:12: RuntimeWarning: invalid value encountered in true_divide
  if sys.path[0] == "":
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:13: RuntimeWarning: invalid value encountered in true_divide
  del sys.path[0]
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:14: RuntimeWarning: invalid value encountered in true_divide
  
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:15: RuntimeWarning: invalid value encountered in true_divide
  from ipykernel import kernelapp as app
In [65]:
print(np.nanmean(overlap_seq_inorder_p,axis=0))
print(np.nanmean(overlap_seq_inorder_inlen_p,axis=0))
print(np.nanmean(overlap_seq_inorder_minlen2_p,axis=0))

print(np.nanmean(currmas_p,axis=0))
print(np.nanmean(currmas_inlen_p,axis=0))
print(np.nanmean(endmas_p,axis=0))
print(np.nanmean(endmas_inlen_p,axis=0))

print(np.nanmean(currmas_minlen2_p,axis=0))
print(np.nanmean(currmas_inlen_minlen2_p,axis=0))

print(np.nanmean(endmas_minlen2_p,axis=0))
print(np.nanmean(endmas_inlen_minlen2_p,axis=0))
[0.22208629 0.77791371]
[0.22208629 0.45150286 0.15166049 0.17475036]
[0.36477837 0.63522163]
[0.06760932 0.28375106]
[0.06760932 0.17354585 0.04429349 0.06591173]
[0.08302076 0.38888562]
[0.08302076 0.21388566 0.07050334 0.10449662]
[0.07980241 0.21998494]
[0.07980241 0.08993949 0.13004545]
[0.10674695 0.34801712]
[0.10674695 0.13866777 0.20934934]

Histogram - length of overlapped sequences¶

In [66]:
%matplotlib notebook
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
plt.figure(figsize=(6,4.5))
plt.hist(length_seq)
plt.title('Histogram of the length of overlapped seqs')
Out[66]:
Text(0.5, 1.0, 'Histogram of the length of overlapped seqs')

How often overlaps with previous paths¶

(including length = 1; which means only a city overlapped)¶

In [67]:
stat12, p12 = wilcoxon(overlap_seq_inorder_p[:,0],overlap_seq_inorder_p[:,1])
print(stat12)
print(p12)
588.0
4.408507196523171e-11
In [68]:
%matplotlib notebook
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
fig = plt.figure(figsize=(6,4.5))
bb = plt.bar(range(2), np.nanmean(overlap_seq_inorder_p,axis=0),
        color=[.7,.7,.7], edgecolor = 'k', 
        yerr=np.nanstd(overlap_seq_inorder_p,axis = 0)/np.sqrt(overlap_seq_inorder_p.shape[0]))
plt.xticks([0,1], ['totally different','has some overlaps'])
plt.xlabel('comparison path before and after undoing')



#statistics
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.1, 0.05, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)


fig.savefig(out_dir + 'proportion_undo_different_overlaps.png', dpi=600, bbox_inches='tight')
In [69]:
stat34, p34 = wilcoxon(overlap_seq_inorder_inlen_p[:,2],overlap_seq_inorder_inlen_p[:,3])
print(stat34)
print(p34)
719.5
7.168079534710966e-05
In [70]:
%matplotlib notebook

mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
plt.figure(figsize=(8,6))
bb = plt.bar(range(4), np.nanmean(overlap_seq_inorder_inlen_p,axis=0),
        color=[.7,.7,.7], edgecolor = 'k', 
        yerr=np.nanstd(overlap_seq_inorder_inlen_p,axis = 0)/np.sqrt(overlap_seq_inorder_inlen_p.shape[0]))
plt.xticks(range(4), ['totally \ndifferent','1','2\nlength of the overlapped sequence','3+'])
plt.xlabel('comparison path before and after undoing')



#statistics
x1, x2 = 2,3
y, h, col = np.max([bb[2].get_height(),bb[3].get_height()]) + 0.1, 0.05, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)\
Out[70]:
Text(2.5, 0.3247503579368857, '$p = 0.000000$')

(length > 1; sequences)¶

In [71]:
stat12, p12 = wilcoxon(overlap_seq_inorder_minlen2_p[:,0],overlap_seq_inorder_minlen2_p[:,1])
print(stat12)
print(p12)
864.5
2.46882479492761e-07
In [72]:
%matplotlib notebook

mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
fig= plt.figure(figsize=(6,4.5))
bb = plt.bar(range(2), np.nanmean(overlap_seq_inorder_minlen2_p,axis=0),
        color=[.7,.7,.7], edgecolor = 'k', 
        yerr=np.nanstd(overlap_seq_inorder_minlen2_p,axis = 0)/np.sqrt(overlap_seq_inorder_minlen2_p.shape[0]))
plt.xticks([0,1], ['totally different','has some overlaps']) # length >= 2
plt.xlabel('comparison path before and after undoing')



#statistics
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.1, 0.05, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)


fig.savefig(out_dir + 'proportion_undo_different_overlaps_len2.png', dpi=600, bbox_inches='tight')
In [73]:
stat12, p12 = wilcoxon(overlap_seq_inorder_inlen_minlen2_p[:,1],overlap_seq_inorder_inlen_minlen2_p[:,2])
print(stat12)
print(p12)
701.5
1.3501305434343351e-06
In [74]:
%matplotlib notebook

mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
fig = plt.figure(figsize=(8,6))
bb = plt.bar(range(3), np.nanmean(overlap_seq_inorder_inlen_minlen2_p,axis=0),
        color=[.7,.7,.7], edgecolor = 'k', 
        yerr=np.nanstd(overlap_seq_inorder_inlen_minlen2_p,axis = 0)/np.sqrt(overlap_seq_inorder_inlen_minlen2_p.shape[0]))
plt.xticks([0,1,1.5,2], ['totally \ndifferent','2','\nlength of the overlapped sequence','3+'])

plt.xlabel('comparison path before and after undoing')



#statistics
x1, x2 = 1,2
y, h, col = np.max([bb[1].get_height(),bb[2].get_height()]) + 0.1, 0.05, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)

fig.savefig(out_dir + 'proportion_undo_different_overlaps_bylen_len2.png', dpi=600, bbox_inches='tight')

MAS (maximum achivable score) difference between two child nodes (bf and af undo; undo target)¶

currmas_fig

(figure to help understanding) MAS diff between af and bf

(including length = 1; which means only a city overlapped)¶

In [75]:
## check variance
print(np.nanstd(currmas_p[:,0], ddof=1))
print(np.nanstd(currmas_p[:,1], ddof=1))
## check normality
sm.qqplot(currmas_p[~np.isnan(currmas_p[:,0]),0], line='s')
py.show()
sm.qqplot(currmas_p[~np.isnan(currmas_p[:,1]),1], line='s')
py.show()
0.3554695787846406
0.3401033696614426
In [76]:
# find the index that currmas_p is not nan
idx = np.where(~np.isnan(currmas_p[:,0]))[0]
stat12, p12 = ttest_ind(currmas_p[idx,0],currmas_p[idx,1])
print(stat12)
print(p12)
-4.167986165569915
4.789498712713116e-05
In [77]:
stat12, p12 = wilcoxon(currmas_p[:,0],currmas_p[:,1])
print(stat12)
print(p12)
613.0
3.66918192925141e-10
In [78]:
%matplotlib notebook

mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
fig = plt.figure(figsize=(6,4.5))
bb = plt.bar(range(2), np.nanmean(currmas_p,axis=0),
        color=[.7,.7,.7], edgecolor = 'k', 
        yerr=np.nanstd(currmas_p,axis = 0)/np.sqrt(currmas_p.shape[0]))
plt.xticks([0,1], ['totally different','has some overlaps'])
# plt.xlabel('mas difference between child cities of branching city af and bf undo')
plt.ylabel('Maximum achivable score difference around branching city')



#statistics
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.1, 0.05, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)

fig.savefig(out_dir + 'proportion_mas_different_overlaps.png', dpi=600, bbox_inches='tight')
In [79]:
## check variance
print(np.nanstd(currmas_inlen_p[:,2], ddof=1))
print(np.nanstd(currmas_inlen_p[:,3], ddof=1))
## check normality
sm.qqplot(currmas_inlen_p[~np.isnan(currmas_inlen_p[:,2]),0], line='s')
py.show()
sm.qqplot(currmas_inlen_p[~np.isnan(currmas_inlen_p[:,3]),1], line='s')
py.show()
0.08867834829644963
0.10264900388789816
In [80]:
stat23, p23 = wilcoxon(currmas_inlen_p[:,2],currmas_inlen_p[:,3])
print(stat23)
print(p23)
716.0
0.00010992678664051827
In [81]:
%matplotlib notebook

mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
plt.figure(figsize=(8,6))
bb = plt.bar(range(4), np.nanmean(currmas_inlen_p,axis=0),
        color=[.7,.7,.7], edgecolor = 'k', 
        yerr=np.nanstd(currmas_inlen_p,axis = 0)/np.sqrt(currmas_inlen_p.shape[0]))
plt.xticks(range(4), ['totally \ndifferent','1','2\nlength of the overlapped sequence','3+'])
plt.xlabel('mas difference between child cities of branching city af and bf undo')



#statistics
x1, x2 = 2,3
y, h, col = np.max([bb[2].get_height(),bb[3].get_height()]) + 0.05, 0.005, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p23), ha='center', va='bottom', color=col, fontsize = 8)
Out[81]:
Text(2.5, 0.12091173000959742, '$p = 0.000110$')

means that new path is better with some overlaps

(length > 1; sequences)¶

In [82]:
## check variance
print(np.nanstd(currmas_minlen2_p[:,0], ddof=1))
print(np.nanstd(currmas_minlen2_p[:,1], ddof=1))
## check normality
sm.qqplot(currmas_minlen2_p[~np.isnan(currmas_minlen2_p[:,0]),0], line='s')
py.show()
sm.qqplot(currmas_minlen2_p[~np.isnan(currmas_minlen2_p[:,1]),1], line='s')
py.show()
0.40523739028006
0.26013279783163734
In [83]:
stat12, p12 = wilcoxon(currmas_minlen2_p[:,0],currmas_minlen2_p[:,1])
print(stat12)
print(p12)
702.0
7.857020484842883e-09
In [84]:
%matplotlib notebook

mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
fig = plt.figure(figsize=(6,4.5))
bb = plt.bar(range(2), np.nanmean(currmas_minlen2_p,axis=0),
        color=[.7,.7,.7], edgecolor = 'k', 
        yerr=np.nanstd(currmas_minlen2_p,axis = 0)/np.sqrt(currmas_minlen2_p.shape[0]))
plt.xticks([0,1], ['totally different','has some overlaps']) # (include len>=2)
plt.xlabel('mas difference between child cities of branching city af and bf undo')




#statistics
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.1, 0.05, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)
fig.savefig(out_dir + 'proportion_mas_different_overlaps_len2.png', dpi=600, bbox_inches='tight')
In [85]:
stat12, p12 = ttest_ind(currmas_inlen_minlen2_p[~np.isnan(currmas_inlen_minlen2_p[:,0]),0],currmas_inlen_minlen2_p[~np.isnan(currmas_inlen_minlen2_p[:,0]),1])
print(stat12)
print(p12)
-0.21086634567790852
0.8332533464873775
In [86]:
stat12, p12 = wilcoxon(currmas_inlen_minlen2_p[:,0],currmas_inlen_minlen2_p[:,1])
stat23, p23 = wilcoxon(currmas_inlen_minlen2_p[:,1],currmas_inlen_minlen2_p[:,2])
print(stat12)
print(p12)
print(stat23)
print(p23)
997.5
1.4510418096217376e-05
730.0
4.24013043843014e-06
In [87]:
%matplotlib notebook

mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
fig = plt.figure(figsize=(8,6))
bb = plt.bar(range(3), np.nanmean(currmas_inlen_minlen2_p,axis=0),
        color=[.7,.7,.7], edgecolor = 'k', 
        yerr=np.nanstd(currmas_inlen_minlen2_p,axis = 0)/np.sqrt(currmas_inlen_minlen2_p.shape[0]))
# plt.xticks(range(3), ['totally \ndifferent','len=2','len>=3'])
plt.xticks([0,1,1.5,2], ['totally \ndifferent','2','\nlength of the overlapped sequence','3+'])
plt.xlabel('mas difference between child cities of branching city af and bf undo')



#statistics
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.05, 0.02, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)

#statistics
x1, x2 = 1,2
y, h, col = np.max([bb[2].get_height(),bb[1].get_height()]) + 0.05, 0.02, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p23), ha='center', va='bottom', color=col, fontsize = 8)

fig.savefig(out_dir + 'proportion_mas_different_overlaps_bylen_len2.png', dpi=600, bbox_inches='tight')

means that new path is better with some overlaps

MAS (maximum achivable score) difference between two end nodes (bf and af undo; new destination)¶

currmas_fig

(figure to help understanding) MAS diff between af and bf

(including length = 1; which means only a city overlapped)¶

In [88]:
## check variance
print(np.nanstd(endmas_p[:,0], ddof=1))
print(np.nanstd(endmas_p[:,1], ddof=1))
## check normality
sm.qqplot(endmas_p[~np.isnan(endmas_p[:,0]),0], line='s')
py.show()
sm.qqplot(endmas_p[~np.isnan(endmas_p[:,1]),1], line='s')
py.show()
0.46010317708591597
0.3441129564856905
In [89]:
stat12, p12 = wilcoxon(endmas_p[:,0],endmas_p[:,1])
print(stat12)
print(p12)
444.5
5.858228574182619e-12
In [90]:
%matplotlib notebook

mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
plt.figure(figsize=(6,4.5))
bb = plt.bar(range(2), np.nanmean(endmas_p,axis=0),
        color=[.7,.7,.7], edgecolor = 'k', 
        yerr=np.nanstd(endmas_p,axis = 0)/np.sqrt(endmas_p.shape[0]))
plt.xticks([0,1], ['totally different','has some overlaps'])
plt.xlabel('mas difference between cities at the end af and bf undo')



#statistics
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.1, 0.05, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)
Out[90]:
Text(0.5, 0.5388856241387392, '$p = 0.000000$')
In [91]:
stat12, p12 = wilcoxon(endmas_inlen_p[:,2],endmas_inlen_p[:,3])
print(stat12)
print(p12)
539.0
1.7191433666615128e-06
In [92]:
%matplotlib notebook

plt.figure()
bb = plt.bar(range(4), np.nanmean(endmas_inlen_p,axis=0),
        color=[.7,.7,.7], edgecolor = 'k', 
        yerr=np.nanstd(endmas_inlen_p,axis = 0)/np.sqrt(endmas_inlen_p.shape[0]))
plt.xticks(range(4), ['totally \ndifferent','len=1','len=2','len>=3'])
plt.xlabel('mas difference between cities at the end af and bf undo')




#statistics
x1, x2 = 2,3
y, h, col = np.max([bb[2].get_height(),bb[3].get_height()]) + 0.1, 0.05, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)
Out[92]:
Text(2.5, 0.2544966193247388, '$p = 0.000002$')

(length > 1; sequences)¶

In [93]:
stat12, p12 = wilcoxon(endmas_minlen2_p[:,0],endmas_minlen2_p[:,1])
print(stat12)
print(p12)
531.5
2.334585044941607e-10
In [94]:
%matplotlib notebook

mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
plt.figure(figsize=(6,4.5))
bb = plt.bar(range(2), np.nanmean(endmas_minlen2_p,axis=0),
        color=[.7,.7,.7], edgecolor = 'k', 
        yerr=np.nanstd(endmas_minlen2_p,axis = 0)/np.sqrt(endmas_minlen2_p.shape[0]))
plt.xticks([0,1], ['totally different','has some overlaps']) #(includer len>=2)
plt.xlabel('mas difference between cities at the end af and bf undo')




#statistics
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.1, 0.05, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)
Out[94]:
Text(0.5, 0.498017117274624, '$p = 0.000000$')
In [95]:
stat12, p12 = wilcoxon(endmas_inlen_minlen2_p[:,0],endmas_inlen_minlen2_p[:,1])
stat23, p23 = wilcoxon(endmas_inlen_minlen2_p[:,1],endmas_inlen_minlen2_p[:,2])
print(stat12)
print(p12)
print(stat23)
print(p23)
976.5
5.988959268758323e-06
536.5
4.308377997714307e-08
In [96]:
%matplotlib notebook

plt.figure()
bb = plt.bar(range(3), np.nanmean(endmas_inlen_minlen2_p,axis=0),
        color=[.7,.7,.7], edgecolor = 'k', 
        yerr=np.nanstd(endmas_inlen_minlen2_p,axis = 0)/np.sqrt(endmas_inlen_minlen2_p.shape[0]))
plt.xticks(range(3), ['totally \ndifferent','len=2','len>=3'])
plt.xlabel('mas difference between cities at the end af and bf undo')




#statistics
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.05, 0.02, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)

#statistics
x1, x2 = 1,2
y, h, col = np.max([bb[2].get_height(),bb[1].get_height()]) + 0.05, 0.02, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p23), ha='center', va='bottom', color=col, fontsize = 8)
Out[96]:
Text(1.5, 0.27934934369358494, '$p = 0.000000$')

NOS¶

In [97]:
currnos_inlen_minlen2 = currnos_inlen.copy()
currnos_inlen_minlen2 = currnos_inlen_minlen2[:,np.setdiff1d(range(currnos_inlen_minlen2.shape[1]),1)]

currnos_minlen2 = currnos_inlen_minlen2.copy()
currnos_minlen2 = np.array([currnos_minlen2[:,0], np.sum(currnos_minlen2[:,1:],axis=1) ]).transpose()

currnos_minlen2_p = np.divide(currnos_minlen2, np.sum(overlap_seq_inorder_minlen2,axis=1).reshape(-1,1))
currnos_inlen_minlen2_p = np.divide(currnos_inlen_minlen2, np.sum(overlap_seq_inorder_inlen_minlen2,axis=1).reshape(-1,1))
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:7: RuntimeWarning: invalid value encountered in true_divide
  import sys
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:8: RuntimeWarning: invalid value encountered in true_divide
  
In [98]:
## check variance
print(np.nanstd(currnos_minlen2_p[:,0], ddof=1))
print(np.nanstd(currnos_minlen2_p[:,1], ddof=1))
## check normality
sm.qqplot(currnos_minlen2_p[~np.isnan(currnos_minlen2_p[:,0]),0], line='s')
py.show()
sm.qqplot(currnos_minlen2_p[~np.isnan(currnos_minlen2_p[:,1]),1], line='s')
py.show()
1.385248629690381
7.83873735380135
In [99]:
stat12, p12 = ttest_ind(currnos_minlen2_p[~np.isnan(currnos_minlen2_p[:,0]),0],currnos_minlen2_p[~np.isnan(currnos_minlen2_p[:,0]),1])
print(stat12)
print(p12)
2.6869335886670407
0.007954071465558715
In [100]:
stat12, p12 = wilcoxon(currnos_minlen2_p[:,0],currnos_minlen2_p[:,1])
print(stat12)
print(p12)
974.0
4.498452277893566e-07
In [101]:
%matplotlib notebook

mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
fig = plt.figure(figsize=(6,4.5))
bb = plt.bar(range(2), np.nanmean(currnos_minlen2_p,axis=0),
        color=[.7,.7,.7], edgecolor = 'k', 
        yerr=np.nanstd(currnos_minlen2_p,axis = 0)/np.sqrt(currnos_minlen2_p.shape[0]))
plt.xticks([0,1], ['totally different','has some overlaps'])
plt.xlabel('nos difference between cities at the end af and bf undo')




#statistics
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.1, 0.02, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)
fig.savefig(out_dir + 'proportion_nos_different_overlaps_len2.png', dpi=600, bbox_inches='tight')
In [102]:
stat12, p12 = wilcoxon(currnos_inlen_minlen2_p[:,1],currnos_inlen_minlen2_p[:,2])
print(stat12)
print(p12)
1112.5
0.0004347880090769754
In [103]:
%matplotlib notebook

fig = plt.figure()
bb = plt.bar(range(3), np.nanmean(currnos_inlen_minlen2_p,axis=0),
        color=[.7,.7,.7], edgecolor = 'k', 
        yerr=np.nanstd(currnos_inlen_minlen2_p,axis = 0)/np.sqrt(currnos_inlen_minlen2_p.shape[0]))
plt.xticks(range(3), ['totally \ndifferent','len=2','len>=3'])
plt.xlabel('nos difference between cities at the end af and bf undo')




# #statistics
# x1, x2 = 0,1
# y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.05, 0.02, 'k'
# plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
# plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)

#statistics
x1, x2 = 1,2
y, h, col =  0.05, 0.02, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)

fig.savefig(out_dir + 'proportion_nos_different_overlaps_bylen_len2.png', dpi=600, bbox_inches='tight')

leftover¶

In [104]:
leftover_inlen_minlen2 = leftover_inlen.copy()
leftover_inlen_minlen2 = leftover_inlen_minlen2[:,np.setdiff1d(range(leftover_inlen_minlen2.shape[1]),1)]

leftover_minlen2 = leftover_inlen_minlen2.copy()
leftover_minlen2 = np.array([leftover_minlen2[:,0], np.sum(leftover_minlen2[:,1:],axis=1) ]).transpose()

leftover_minlen2_p = np.divide(leftover_minlen2, np.sum(overlap_seq_inorder_minlen2,axis=1).reshape(-1,1))
leftover_inlen_minlen2_p = np.divide(leftover_inlen_minlen2, np.sum(overlap_seq_inorder_inlen_minlen2,axis=1).reshape(-1,1))
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:7: RuntimeWarning: invalid value encountered in true_divide
  import sys
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:8: RuntimeWarning: invalid value encountered in true_divide
  
In [105]:
print(np.nanmean(leftover_minlen2_p,axis=0))
print(np.nanmean(leftover_inlen_minlen2_p,axis=0))
[-11.01847968 -11.14767781]
[-11.01847968  -5.00073805  -6.14693976]
In [106]:
stat12, p12 = wilcoxon(leftover_minlen2_p[:,0],leftover_minlen2_p[:,1])
print(stat12)
print(p12)
1316.0
3.225460839514926e-05
In [107]:
%matplotlib notebook

mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
plt.figure(figsize=(6,4.5))
bb = plt.bar(range(2), np.nanmean(leftover_minlen2_p,axis=0),
        color=[.7,.7,.7], edgecolor = 'k', 
        yerr=np.nanstd(leftover_minlen2_p,axis = 0)/np.sqrt(leftover_minlen2_p.shape[0]))
plt.xticks([0,1], ['totally different','has some overlaps'])
plt.xlabel('leftover difference between cities at the end af and bf undo')



#statistics
x1, x2 = 0,1
y, h, col = 4, 1, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)
Out[107]:
Text(0.5, 5, '$p = 0.000032$')
In [108]:
%matplotlib notebook

fig = plt.figure()
bb = plt.bar(range(3), np.nanmean(leftover_inlen_minlen2_p,axis=0),
        color=[.7,.7,.7], edgecolor = 'k', 
        yerr=np.nanstd(leftover_inlen_minlen2_p,axis = 0)/np.sqrt(leftover_inlen_minlen2_p.shape[0]))
plt.xticks(range(3), ['totally \ndifferent','len=2','len>=3'])
plt.xlabel('leftover difference between cities at the end af and bf undo')




#statistics
x1, x2 = 0,1
y, h, col = 1, 0.02, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)

#statistics
x1, x2 = 1,2
y, h, col =  1, 0.02, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p23), ha='center', va='bottom', color=col, fontsize = 8)
fig.savefig(out_dir + 'proportion_leftover_different_overlaps_bylen_len2.png', dpi=600, bbox_inches='tight')
In [109]:
RT_inlen_minlen2 = RT_inlen.copy()
RT_inlen_minlen2 = RT_inlen_minlen2[:,np.setdiff1d(range(RT_inlen_minlen2.shape[1]),1)]

RT_minlen2 = RT_inlen_minlen2.copy()
RT_minlen2 = np.array([RT_minlen2[:,0], np.sum(RT_minlen2[:,1:],axis=1) ]).transpose()

RT_minlen2_p = np.divide(RT_minlen2, np.sum(overlap_seq_inorder_minlen2,axis=1).reshape(-1,1))
RT_inlen_minlen2_p = np.divide(RT_inlen_minlen2, np.sum(overlap_seq_inorder_inlen_minlen2,axis=1).reshape(-1,1))

RT_minlen2_p /= 1000
RT_inlen_minlen2_p /= 1000
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:7: RuntimeWarning: invalid value encountered in true_divide
  import sys
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:8: RuntimeWarning: invalid value encountered in true_divide
  
In [110]:
print(np.nanmean(RT_minlen2_p,axis=0))
print(np.nanmean(RT_inlen_minlen2_p,axis=0))
[0.12435565 1.2240461 ]
[0.12435565 0.52759115 0.69645495]
In [111]:
stat12, p12 = wilcoxon(RT_minlen2_p[:,0],RT_minlen2_p[:,1])
print(stat12)
print(p12)
229.0
2.917295855137348e-15
In [112]:
%matplotlib notebook

mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
plt.figure(figsize=(6,4.5))
bb = plt.bar(range(2), np.nanmean(RT_minlen2_p,axis=0),
        color=[.7,.7,.7], edgecolor = 'k', 
        yerr=np.nanstd(RT_minlen2_p,axis = 0)/np.sqrt(RT_minlen2_p.shape[0]))
plt.xticks([0,1], ['totally different','has some overlaps'])
plt.xlabel('RT difference between cities at the end af and bf undo')


x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.2, 0.02, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)
Out[112]:
Text(0.5, 1.4440460968260245, '$p = 0.000000$')
In [113]:
stat12, p12 = wilcoxon(RT_inlen_minlen2_p[:,0],RT_inlen_minlen2_p[:,1])
stat23, p23 = wilcoxon(RT_inlen_minlen2_p[:,2],RT_inlen_minlen2_p[:,1])
print(stat12)
print(p12)
print(p23)
705.0
3.905682059423199e-10
2.3376789112987052e-06
In [114]:
%matplotlib notebook

plt.figure()
bb = plt.bar(range(3), np.nanmean(RT_inlen_minlen2_p,axis=0),
        color=[.7,.7,.7], edgecolor = 'k', 
        yerr=np.nanstd(RT_inlen_minlen2_p,axis = 0)/np.sqrt(RT_inlen_minlen2_p.shape[0]))
plt.xticks(range(3), ['totally \ndifferent','len=2','len>=3'])
plt.xlabel('leftover difference between cities at the end af and bf undo')




#statistics
x1, x2 = 0,1
y, h, col = 1, 0.02, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)

#statistics
x1, x2 = 1,2
y, h, col =  1, 0.02, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p23), ha='center', va='bottom', color=col, fontsize = 8)
Out[114]:
Text(1.5, 1.02, '$p = 0.000002$')

Zigzagness¶

In [115]:
import copy
def get_tortuosity(pathList, pzi = 15):
    def cal_dist(xy1, xy2):
        return np.sqrt(np.sum((np.array(xy1)-np.array(xy2))**2))

    pathList_ = copy.deepcopy(pathList)

    ### for the def
    import json
    with open('./util/basicMap.json','rb') as f:
        basic_map = json.load(f)
    tortuosity = []
    ###

    map_pz = basic_map[pzi]

    import numpy as np
    for _ in range(len(pathList_)):
        path = pathList_.pop(0)
        if len(path)>1:
            ct_init = path.pop(0)
#             print(path)
#             print('init{}_tgt{}'.format(ct_init, path[-1]))
#             print('map_init{}_map_tgt{}'.format(map_pz['xy'][ct_init], map_pz['xy'][path[-1]]))
            d_straight = cal_dist(map_pz['xy'][ct_init], map_pz['xy'][path[-1]])
            d_zigzag = 0
            for _ in range(len(path)):
                ct_tgt = path.pop(0)
                d_zigzag += cal_dist(map_pz['xy'][ct_init], map_pz['xy'][ct_tgt])
                ct_init = ct_tgt
            tortuosity.append([d_zigzag, d_straight])
        else:
            tortuosity.append([np.nan, np.nan])
    return np.array(tortuosity)

import difflib

def get_overlap(s1, s2):
    s = difflib.SequenceMatcher(None, s1, s2)
    pos_a, pos_b, size = s.find_longest_match(0, len(s1), 0, len(s2)) 
    return s1[pos_a:pos_a+size]
 
def get_overlaplist(l1,l2,minlen=1):
    matched_seq = []
    for i1 in range(len(l1)):
#         print('l1:{}'.format(i1))
        matches = [i2 for i2 in range(len(l2)) if l2[i2] == l1[i1]]
#         print(matches)
        for m in matches:
            matched_seq_ = []
            i_a = 0
            while ((i1+i_a)<len(l1)) and ((m+i_a)< len(l2)):
                if l1[i1+i_a]==l2[m+i_a]:
                    matched_seq_.append(l1[i1+i_a])
                    i_a += 1
                else:
                    break
            matched_seq.append(matched_seq_)
            
    idx = np.argsort([len(seq) for seq in matched_seq])[::-1]
#     print(idx)

    seqs_ = []
    for i in idx:
        if len(seqs_) == 0:
            seqs_.append(matched_seq[i])
        else:
            if not np.any([np.all(np.isin(matched_seq[i],seq)) for seq in seqs_]):
                seqs_.append(matched_seq[i])
    matched_seq = seqs_.copy()
    idx = np.argsort([len(seq) for seq in matched_seq])
    seqs_ = []
    for i in idx:
        if len(seqs_) == 0:
            seqs_.append(matched_seq[i])
        else:
            if not np.any([np.all(np.isin(matched_seq[i],seq)) for seq in seqs_]):
                seqs_.append(matched_seq[i])
                
    seqs = []
    for seq in seqs_:
#         print(len(seq))
        if not len(seq)<minlen:
            seqs.append(seq)
            
    return seqs

def trimmer(matched_seq,minlen=1):
    idx = np.argsort([len(seq) for seq in matched_seq])[::-1]
#     print(idx)

    seqs_ = []
    for i in idx:
        if len(seqs_) == 0:
            seqs_.append(matched_seq[i])
        else:
            if not np.any([np.all(np.isin(matched_seq[i],seq)) for seq in seqs_]):
                seqs_.append(matched_seq[i])
    matched_seq = seqs_.copy()
    idx = np.argsort([len(seq) for seq in matched_seq])
    seqs_ = []
    for i in idx:
        if len(seqs_) == 0:
            seqs_.append(matched_seq[i])
        else:
            if not np.any([np.all(np.isin(matched_seq[i],seq)) for seq in seqs_]):
                seqs_.append(matched_seq[i])
                
    seqs = []
    for seq in seqs_:
#         print(len(seq))
        if not len(seq)<minlen:
            seqs.append(seq)
            
    return seqs
In [116]:
undo_zigzag = []
undo_zigzag_diff =[]
undo_zigzag_diff_inlen =[]

overlap_seq_inorder = []
overlap_seq_inorder_inlen=[]

length_seq = []

corr_ = []

for sub in range(100):
    dat_sbj  = sc_data_choice_level[sc_data_choice_level['subjects']==sub].sort_values(["puzzleID","index"])
    undo_zigzag_puzzle = []
    undo_zigzag_diff_for_puzzle = []
    undo_zigzag_diff_inlen_for_puzzle = []
    overlap_seq_inorder_for_puzzle = []
    overlap_seq_inorder_inlen_for_puzzle=[]
    corr_puzzle_ =[]
    for pzi in np.unique(sc_data_choice_level['puzzleID']):
        dat_sbj_pzi = dat_sbj[dat_sbj['puzzleID'] == pzi].reset_index()        
 
        firstUndo_idx = dat_sbj_pzi[dat_sbj_pzi["firstUndo"]==1].index
        lastUndo_idx = dat_sbj_pzi[dat_sbj_pzi["lastUndo"]==1].index

        submit_idx =  pd.Int64Index([len(dat_sbj_pzi)-1])
        
        
        if len(firstUndo_idx)>0:
            if len(firstUndo_idx)==1: #
                seq_bf = dat_sbj_pzi["path"][firstUndo_idx-1]
                seq_af = dat_sbj_pzi["path"][submit_idx]
                seq_im = dat_sbj_pzi["path"][lastUndo_idx]
                currmas_bf = dat_sbj_pzi["currMas"][lastUndo_idx-1]
                currmas_af = dat_sbj_pzi["currMas"][lastUndo_idx+1]
            else:
                seq_bf = dat_sbj_pzi["path"][firstUndo_idx-1]
                t_idx = (firstUndo_idx[1:]-1).to_list()
                t_idx.append(submit_idx.item())
                seq_af = dat_sbj_pzi["path"][t_idx]
                seq_im = dat_sbj_pzi["path"][lastUndo_idx]
                currmas_bf = dat_sbj_pzi["currMas"][lastUndo_idx-1]
                currmas_af = dat_sbj_pzi["currMas"][lastUndo_idx+1]

            seq_im = seq_im.reset_index()
            pathStr_im = [seq_im.loc[i].path.strip('[').strip(']') for i in range(len(seq_im))]
            pathList_im = [[int(i) for i in pathStr_im[j].split(', ')]  for j in range(len(pathStr_im))]
            pathStr_im = [" ".join([str(a) for a in pathList_im[j]]) for j in range(len(seq_im))]

            seq_bf = seq_bf.reset_index()
            pathStr_bf = [seq_bf.loc[i].path.strip('[').strip(']') for i in range(len(seq_bf))]
            pathList_bf = [[int(i) for i in pathStr_bf[j].split(', ')]  for j in range(len(pathStr_bf))]
            pathStr_bf = [" ".join([str(a) for a in pathList_bf[j]]) for j in range(len(seq_bf))]

            seq_af = seq_af.reset_index()
            pathStr_af = [seq_af.loc[i].path.strip('[').strip(']') for i in range(len(seq_af))]
            pathList_af = [[int(i) for i in pathStr_af[j].split(', ')]  for j in range(len(pathStr_af))]
            pathStr_af = [" ".join([str(a) for a in pathList_af[j]]) for j in range(len(seq_af))]

            pathList_bfim = [np.setdiff1d(np.array(pathList_bf[i]),np.array(pathList_im[i])).tolist() for i in range(len(pathList_bf))]
            pathStr_bfim = [" ".join([str(a) for a in pathList_bfim[j]]) for j in range(len(pathList_im))]

            pathList_afim = [np.setdiff1d(np.array(pathList_af[i]),np.array(pathList_im[i])).tolist() for i in range(len(pathList_af))]
            pathStr_afim = [" ".join([str(a) for a in pathList_afim[j]]) for j in range(len(pathList_im))]

            tor_bf = get_tortuosity(pathList_bfim, pzi)
            path_bf_undo = (tor_bf[:,0]/tor_bf[:,1])
            tor_af = get_tortuosity(pathList_afim, pzi)
            path_af_undo = (tor_af[:,0]/tor_af[:,1])
            
            zigzag_diff = np.array(path_af_undo) - np.array(path_bf_undo)
            currmas_diff = np.array(currmas_af) - np.array(currmas_bf)
            
            if len(zigzag_diff) != len(currmas_diff):
                print(zigzag_diff, currmas_diff)
            corr_puzzle_.append([zigzag_diff, currmas_diff])
            
            overlap_seq_2 = []
            zigzag_diff_2 = []
            seq_inorder_2 = []
            
            for i in range(len(seq_af)):
                temp = []
                for j in range(i+1):
            #         temp.append(get_overlaplist(pathList_afim[i], pathList_bfim[j]))
                    temptemp = get_overlaplist(pathList_afim[i], pathList_bfim[j])
                    if not len(temptemp)==0:
                        temp.extend(temptemp)

                temp = trimmer(temp)
                if len(temp)==0:
                    temp.append([])

                seq_inorder_2.append(temp)

            #     len_seq_inorder_2 = [len(s) for s in temp if len(s)!=0]
                len_seq_inorder_2 =[]
                tempzigd = []
                for s in temp:
#                     print('*')
                    if len(s)==0:
                        len_seq_inorder_2.append(0)
                    else:
                        len_seq_inorder_2.append(len(s))
                        
                    tempzigd.append(zigzag_diff[i])
                    
                        
                overlap_seq_2.extend(len_seq_inorder_2)
                zigzag_diff_2.extend(tempzigd)
            
            # I think it doesn't mean it choose different path, it means at least once the chosen city is different after undo
            if np.any(np.array(dat_sbj_pzi["choice"][lastUndo_idx-1]) != np.array(dat_sbj_pzi["choice"][lastUndo_idx+1])):
                idxx = np.array(dat_sbj_pzi["choice"][lastUndo_idx-1]) != np.array(dat_sbj_pzi["choice"][lastUndo_idx+1])
                undo_zigzag_puzzle.extend(np.sign(np.array(path_af_undo[idxx]) - np.array(path_bf_undo[idxx])))
                
                overlap_seq_inorder_for_puzzle.append([np.sum(np.array(overlap_seq_2)==0), np.sum(np.array(overlap_seq_2)!=0)])
                overlap_seq_inorder_inlen_for_puzzle.append([np.sum(np.array(overlap_seq_2)==0), np.sum(np.array(overlap_seq_2)==1), 
                                                             np.sum(np.array(overlap_seq_2)==2), np.sum(np.array(overlap_seq_2)>2),])

                length_seq.extend(overlap_seq_2)
                
                undo_zigzag_diff_for_puzzle.append([np.sum(np.array(zigzag_diff_2)[np.array(overlap_seq_2)==0]) , np.sum(np.array(zigzag_diff_2)[np.array(overlap_seq_2)!=0]) ] )
                undo_zigzag_diff_inlen_for_puzzle.append([np.sum(np.array(zigzag_diff_2)[np.array(overlap_seq_2)==0]), np.sum(np.array(zigzag_diff_2)[np.array(overlap_seq_2)==1]), 
                                                             np.sum(np.array(zigzag_diff_2)[np.array(overlap_seq_2)==2]), np.sum(np.array(zigzag_diff_2)[np.array(overlap_seq_2)>2]),])
                
            

#         else:
#             overlap_seq_inorder_for_puzzle.append([np.nan,np.nan])
#             overlap_seq_inorder_inlen_for_puzzle.append([np.nan,np.nan,np.nan,np.nan])
            
    overlap_seq_inorder_for_puzzle = np.array(overlap_seq_inorder_for_puzzle)
    temp = overlap_seq_inorder_for_puzzle.copy()
#     overlap_seq_inorder.append(np.nansum(temp,axis=0))
#     print('*'*10)
    if len(temp) == 0:
        overlap_seq_inorder.append(np.zeros((2)))
    else:
        overlap_seq_inorder.append(np.nansum(temp,axis=0))


    overlap_seq_inorder_inlen_for_puzzle=np.array(overlap_seq_inorder_inlen_for_puzzle)
    temp = overlap_seq_inorder_inlen_for_puzzle.copy()
    
    if len(temp) == 0:
        overlap_seq_inorder_inlen.append(np.zeros((4)))
    else:
        overlap_seq_inorder_inlen.append(np.nansum(temp,axis=0))
        
    undo_zigzag_puzzle =  np.array(undo_zigzag_puzzle)
#     undo_zigzag.append([np.sum(undo_zigzag_puzzle<0), np.sum(undo_zigzag_puzzle==0) ,np.sum(undo_zigzag_puzzle>0)])
    undo_zigzag.append([np.sum(undo_zigzag_puzzle<0) ,np.sum(undo_zigzag_puzzle>0)])
    


    undo_zigzag_diff_for_puzzle=np.array(undo_zigzag_diff_for_puzzle)
    temp = undo_zigzag_diff_for_puzzle.copy()
    if len(temp) == 0:
        undo_zigzag_diff.append(np.zeros((2)))
    else:
        undo_zigzag_diff.append(np.nansum(temp,axis=0))
                                
                                
                                
    undo_zigzag_diff_inlen_for_puzzle=np.array(undo_zigzag_diff_inlen_for_puzzle)
    temp = undo_zigzag_diff_inlen_for_puzzle.copy()
    
    if len(temp) == 0:
        undo_zigzag_diff_inlen.append(np.zeros((4)))
    else:
        undo_zigzag_diff_inlen.append(np.nansum(temp,axis=0))
    
    c_0 = []
    c_1 = []
    for c in corr_puzzle_:
        c_0.extend(c[0] )
        c_1.extend(c[1] )
    from scipy.stats import pearsonr
    index = np.intersect1d(np.where(~np.isnan(c_0))[0], np.where(~np.isnan(c_1))[0])
    try:
        r,p=pearsonr(np.array(c_0)[index], np.array(c_1)[index])
        corr_.append(r)
    except:
        ''
    
    
undo_zigzag = np.array(undo_zigzag)
                                
undo_zigzag_diff = np.array(undo_zigzag_diff)
undo_zigzag_diff_inlen = np.array(undo_zigzag_diff_inlen)

overlap_seq_inorder = np.array(overlap_seq_inorder)
overlap_seq_inorder_inlen = np.array(overlap_seq_inorder_inlen)
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/scipy/stats/stats.py:4023: PearsonRConstantInputWarning: An input array is constant; the correlation coefficient is not defined.
  warnings.warn(PearsonRConstantInputWarning())
In [117]:
np.nanmean(np.array(corr_)[np.where(~np.isnan(corr_))[0]])
Out[117]:
0.22923986038665958
In [118]:
data1=np.array(corr_)[np.where(~np.isnan(corr_))[0]]
In [119]:
ttest_ind(data1,np.zeros(data1.shape))
Out[119]:
Ttest_indResult(statistic=5.464790186633249, pvalue=2.0233842347730814e-07)
In [120]:
from scipy.stats import pearsonr

c_0 = []
c_1 = []
for c in corr_puzzle_:
    c_0.extend(c[0] )
    c_1.extend(c[1] )
In [121]:
overlap_seq_inorder_inlen_minlen2 = overlap_seq_inorder_inlen.copy()
overlap_seq_inorder_inlen_minlen2 = overlap_seq_inorder_inlen_minlen2[:,np.setdiff1d(range(overlap_seq_inorder_inlen_minlen2.shape[1]),1)]
overlap_seq_inorder_minlen2 = overlap_seq_inorder_inlen_minlen2.copy()
overlap_seq_inorder_minlen2 = np.array([overlap_seq_inorder_minlen2[:,0], np.sum(overlap_seq_inorder_minlen2[:,1:],axis=1) ]).transpose()

overlap_seq_inorder_p = overlap_seq_inorder/np.sum(overlap_seq_inorder,axis=1)[:,None]
overlap_seq_inorder_inlen_p = overlap_seq_inorder_inlen/np.sum(overlap_seq_inorder_inlen,axis=1)[:,None]


overlap_seq_inorder_minlen2_p = overlap_seq_inorder_minlen2/np.sum(overlap_seq_inorder_minlen2,axis=1)[:,None]
overlap_seq_inorder_inlen_minlen2_p = overlap_seq_inorder_inlen_minlen2/np.sum(overlap_seq_inorder_inlen_minlen2,axis=1)[:,None]
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:6: RuntimeWarning: invalid value encountered in true_divide
  
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:7: RuntimeWarning: invalid value encountered in true_divide
  import sys
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in true_divide
  # Remove the CWD from sys.path while we load stuff.
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:11: RuntimeWarning: invalid value encountered in true_divide
  # This is added back by InteractiveShellApp.init_path()
In [122]:
undo_zigzag_diff_inlen_minlen2 = undo_zigzag_diff_inlen.copy()
undo_zigzag_diff_inlen_minlen2 = undo_zigzag_diff_inlen_minlen2[:,np.setdiff1d(range(undo_zigzag_diff_inlen_minlen2.shape[1]),1)]
undo_zigzag_diff_minlen2 = undo_zigzag_diff_inlen_minlen2.copy()
undo_zigzag_diff_minlen2 = np.array([undo_zigzag_diff_minlen2[:,0], np.sum(undo_zigzag_diff_minlen2[:,1:],axis=1) ]).transpose()

undo_zigzag_diff_minlen2_p = undo_zigzag_diff_minlen2/np.sum(undo_zigzag_diff_minlen2,axis=1)[:,None]
undo_zigzag_diff_inlen_minlen2_p = undo_zigzag_diff_inlen_minlen2/np.sum(undo_zigzag_diff_inlen_minlen2,axis=1)[:,None]
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:6: RuntimeWarning: invalid value encountered in true_divide
  
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:7: RuntimeWarning: invalid value encountered in true_divide
  import sys
In [123]:
# exclude some never undoing subjects
undo_zigzag = undo_zigzag[np.where(np.sum(np.array(undo_zigzag),axis=1)!=0),:]
undo_zigzag = undo_zigzag.squeeze()
undo_zigzag_p = undo_zigzag/ np.sum(undo_zigzag,axis = 1)[:,None]
In [124]:
stat12, p12 = wilcoxon(undo_zigzag_diff_minlen2_p[:,0],undo_zigzag_diff_minlen2_p[:,1])
print(stat12)
print(p12)
666.0
1.6278452784027353e-10
In [125]:
%matplotlib notebook


mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
fig = plt.figure(figsize=(6,4.5))
bb = plt.bar(range(2), np.nanmean(undo_zigzag_diff_minlen2_p,axis=0),
        color=[.7,.7,.7], edgecolor = 'k', 
        yerr=np.nanstd(undo_zigzag_diff_minlen2_p,axis = 0)/np.sqrt(undo_zigzag_diff_minlen2_p.shape[0]))
plt.xticks([0,1], ['totally different','has some cities that overlaps'])
plt.xlabel('zigzagness difference between child cities of branching city af and bf undo')




#statistics
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.8, 0.02, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)

fig.savefig(out_dir + 'proportion_zigznagness_different_overlaps_len2.png', dpi=600, bbox_inches='tight')

How zigzagness has changed due to overlapped seq¶

In [126]:
stat12, p12 = wilcoxon(undo_zigzag_diff_inlen_minlen2_p[:,0],undo_zigzag_diff_inlen_minlen2_p[:,1])
stat23, p23 = wilcoxon(undo_zigzag_diff_inlen_minlen2_p[:,1],undo_zigzag_diff_inlen_minlen2_p[:,2])
print(stat12)
print(p12)
print(stat23)
print(p23)
1132.0
2.76521429315202e-06
818.0
9.607948130467323e-08
In [127]:
%matplotlib notebook

mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
fig = plt.figure(figsize=(8,6))
bb = plt.bar(range(3), np.nanmean(undo_zigzag_diff_inlen_minlen2_p,axis=0),
        color=[.7,.7,.7], edgecolor = 'k', 
        yerr=np.nanstd(undo_zigzag_diff_inlen_minlen2_p,axis = 0)/np.sqrt(undo_zigzag_diff_inlen_minlen2_p.shape[0]))
# plt.xticks(range(3), ['totally \ndifferent','len=2','len>=3'])
plt.xticks([0,1,1.5,2], ['totally \ndifferent','2','\nlength of the overlapped sequence','3+'])
plt.xlabel('zigzagness difference between child cities of branching city af and bf undo')





#statistics
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.5, 0.02, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)

#statistics
x1, x2 = 1,2
y, h, col = np.max([bb[1].get_height(),bb[2].get_height()]) + 0.5, 0.02, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p23), ha='center', va='bottom', color=col, fontsize = 8)
fig.savefig(out_dir + 'proportion_zigznagness_different_overlaps_bylen_len2.png', dpi=600, bbox_inches='tight')

Cagegorizing patterns¶

In [128]:
def get_overlap_cts(l1,l2):
    
    matched_seq = []
    for i1 in range(len(l1)):
        matches = [l2[i2] for i2 in range(len(l2)) if l2[i2] == l1[i1]]
        matched_seq.extend(matches)

    return matched_seq


def get_overlap(s1, s2):
    s = difflib.SequenceMatcher(None, s1, s2)
    pos_a, pos_b, size = s.find_longest_match(0, len(s1), 0, len(s2)) 
    return s1[pos_a:pos_a+size]
 
def get_overlaplist(l1,l2,minlen=1):
    matched_seq = []
    for i1 in range(len(l1)):
#         print('l1:{}'.format(i1))
        matches = [i2 for i2 in range(len(l2)) if l2[i2] == l1[i1]]
#         print(matches)
        for m in matches:
            matched_seq_ = []
            i_a = 0
            while ((i1+i_a)<len(l1)) and ((m+i_a)< len(l2)):
                if l1[i1+i_a]==l2[m+i_a]:
                    matched_seq_.append(l1[i1+i_a])
                    i_a += 1
                else:
                    break
            matched_seq.append(matched_seq_)
            
    idx = np.argsort([len(seq) for seq in matched_seq])[::-1]
#     print(idx)

    seqs_ = []
    for i in idx:
        if len(seqs_) == 0:
            seqs_.append(matched_seq[i])
        else:
            if not np.any([np.all(np.isin(matched_seq[i],seq)) for seq in seqs_]):
                seqs_.append(matched_seq[i])
    matched_seq = seqs_.copy()
    idx = np.argsort([len(seq) for seq in matched_seq])
    seqs_ = []
    for i in idx:
        if len(seqs_) == 0:
            seqs_.append(matched_seq[i])
        else:
            if not np.any([np.all(np.isin(matched_seq[i],seq)) for seq in seqs_]):
                seqs_.append(matched_seq[i])
                
    seqs = []
    for seq in seqs_:
#         print(len(seq))
        if not len(seq)<minlen:
            seqs.append(seq)
            
    return seqs

def trimmer(matched_seq,minlen=1):
    idx = np.argsort([len(seq) for seq in matched_seq])[::-1]
#     print(idx)

    seqs_ = []
    for i in idx:
        if len(seqs_) == 0:
            seqs_.append(matched_seq[i])
        else:
            if not np.any([np.all(np.isin(matched_seq[i],seq)) for seq in seqs_]):
                seqs_.append(matched_seq[i])
    matched_seq = seqs_.copy()
    idx = np.argsort([len(seq) for seq in matched_seq])
    seqs_ = []
    for i in idx:
        if len(seqs_) == 0:
            seqs_.append(matched_seq[i])
        else:
            if not np.any([np.all(np.isin(matched_seq[i],seq)) for seq in seqs_]):
                seqs_.append(matched_seq[i])
                
    seqs = []
    for seq in seqs_:
#         print(len(seq))
        if not len(seq)<minlen:
            seqs.append(seq)
            
    return seqs
In [129]:
['same', 'reordering', 'partial change', 'totally different', 
 'inserting new cities', 'removed some from prev seq']
patterns = []


overlap_cts = []
overlap_cts_inlen=[]
overlap_seq_inorder = []
overlap_seq_inorder_inlen=[]
currmas = []
currmas_inlen=[]
endmas = []
endmas_inlen=[]

length_seq = []

for sub in range(100):
    dat_sbj  = sc_data_choice_level[sc_data_choice_level['subjects']==sub].sort_values(["puzzleID","index"])
    overlap_cts_for_puzzle = []
    overlap_cts_inlen_for_puzzle=[]
    overlap_seq_inorder_for_puzzle = []
    overlap_seq_inorder_inlen_for_puzzle=[]
    currmas_for_puzzle = []
    currmas_inlen_for_puzzle=[]
    endmas_for_puzzle = []
    endmas_inlen_for_puzzle=[]
    
    patterns_for_puzzle = np.zeros((6))
    for pzi in np.unique(sc_data_choice_level['puzzleID']):
        dat_sbj_pzi = dat_sbj[dat_sbj['puzzleID'] == pzi].reset_index()        

        firstUndo_idx = dat_sbj_pzi[dat_sbj_pzi["firstUndo"]==1].index
        path_bf_undo = dat_sbj_pzi["currMas"][firstUndo_idx-1] # the mas of the state before undo

        lastUndo_idx = dat_sbj_pzi[dat_sbj_pzi["lastUndo"]==1].index
        path_af_undo = dat_sbj_pzi["currMas"][lastUndo_idx+1] # the mas of the state after undo
        
        submit_idx =  pd.Int64Index([len(dat_sbj_pzi)-1])
        
        if len(firstUndo_idx)>0:
            if len(firstUndo_idx)==1: #
                seq_bf = dat_sbj_pzi["path"][firstUndo_idx-1]
                seq_af = dat_sbj_pzi["path"][submit_idx]
                seq_im = dat_sbj_pzi["path"][lastUndo_idx]
                currmas_bf = dat_sbj_pzi["currMas"][lastUndo_idx-1]
                currmas_af = dat_sbj_pzi["currMas"][lastUndo_idx+1]

                endmas_bf = dat_sbj_pzi["currMas"][firstUndo_idx-1]
                endmas_af = dat_sbj_pzi["currMas"][submit_idx]

            else:
                seq_bf = dat_sbj_pzi["path"][firstUndo_idx-1]
                t_idx = (firstUndo_idx[1:]-1).to_list()
                t_idx.append(submit_idx.item())
                seq_af = dat_sbj_pzi["path"][t_idx]
                seq_im = dat_sbj_pzi["path"][lastUndo_idx]
                currmas_bf = dat_sbj_pzi["currMas"][lastUndo_idx-1]
                currmas_af = dat_sbj_pzi["currMas"][lastUndo_idx+1]

                endmas_bf = dat_sbj_pzi["currMas"][firstUndo_idx-1]
                endmas_af = dat_sbj_pzi["currMas"][t_idx]

            seq_im = seq_im.reset_index()
            pathStr_im = [seq_im.loc[i].path.strip('[').strip(']') for i in range(len(seq_im))]
            pathList_im = [[int(i) for i in pathStr_im[j].split(', ')]  for j in range(len(pathStr_im))]
            pathStr_im = [" ".join([str(a) for a in pathList_im[j]]) for j in range(len(seq_im))]

            seq_bf = seq_bf.reset_index()
            pathStr_bf = [seq_bf.loc[i].path.strip('[').strip(']') for i in range(len(seq_bf))]
            pathList_bf = [[int(i) for i in pathStr_bf[j].split(', ')]  for j in range(len(pathStr_bf))]
            pathStr_bf = [" ".join([str(a) for a in pathList_bf[j]]) for j in range(len(seq_bf))]

            seq_af = seq_af.reset_index()
            pathStr_af = [seq_af.loc[i].path.strip('[').strip(']') for i in range(len(seq_af))]
            pathList_af = [[int(i) for i in pathStr_af[j].split(', ')]  for j in range(len(pathStr_af))]
            pathStr_af = [" ".join([str(a) for a in pathList_af[j]]) for j in range(len(seq_af))]

            pathList_bfim = [np.setdiff1d(np.array(pathList_bf[i]),np.array(pathList_im[i])).tolist() for i in range(len(pathList_bf))]
            pathStr_bfim = [" ".join([str(a) for a in pathList_bfim[j]]) for j in range(len(pathList_im))]

            pathList_afim = [np.setdiff1d(np.array(pathList_af[i]),np.array(pathList_im[i])).tolist() for i in range(len(pathList_af))]
            pathStr_afim = [" ".join([str(a) for a in pathList_afim[j]]) for j in range(len(pathList_im))]


        #     seq_inorder = [get_overlap(pathStr_af[i], pathStr_bf[i]).strip('0 ').split(' ') for i in range(len(seq_af))]
        #     seq_inorder = [[] if i[0]=='' else [int(j) for j in i] for i in seq_inorder]

            cts = [get_overlap_cts(pathList_afim[i], pathList_bfim[i]) for i in range(len(seq_af))for j in range(i+1) ]
        #     seq_inorder = [[] if i[0]=='' else [int(j) for j in i] for i in seq_inorder]
            
            len_af = []
            for i in range(len(seq_af)):

                temp = []
                temp2 = []
                for j in range(i+1):
                    temptemp = get_overlap_cts(pathList_afim[i], pathList_bfim[j])
                    temptemp2 = get_overlaplist(pathList_afim[i], pathList_bfim[j])
                    temp_list2 = []
                    for t_ in temptemp2:
                        temp_list2.extend(t_)
                    len_af.append([len(temp_list2) ,np.mean([len(tt) for tt in temptemp2])
                                   ,len(pathList_afim[i]) ,len(pathList_bfim[j])])


                    # same length
                    if len_af[-1][2]==(len_af[-1][3]):
                        # exactly same sequence
                        if len_af[-1][2] == len_af[-1][1]: 
#                             print('same')
                            patterns_for_puzzle[0]+=1
                        # not exactly same sequence,
                        else:
                            # but has same cities in the squence
                            if len_af[-1][2] == len_af[-1][0]:# same 
#                                 preint('reordering')
                                patterns_for_puzzle[1]+=1
                            # different cities
                            else:
                                # no overlapping cities at all
                                if len_af[-1][0]==0:
#                                     print('totally different')
                                    patterns_for_puzzle[3]+=1
                                # has some overlapping cities. 
                                else:
#                                     print('partial overlap')
                                    patterns_for_puzzle[2]+=1

                    else: # different length
                        # path before is exactly composed of the overlapping cities 
                        # which means that you inserted some in the path after undo.
                        if len_af[-1][3] == len_af[-1][0]:
#                             print('inserting new cities')
                            patterns_for_puzzle[4]+=1
                        # path after undo is exactly composed of the overlapping cities 
                        # which means removing one city from previous.
                        elif len_af[-1][2]==len_af[-1][0]:
#                             print('removed some from prev seq')
                            patterns_for_puzzle[5]+=1
                        else:
                            # if there is not in common
                            if len_af[-1][0]==0:
#                                 print('totally different')
                                patterns_for_puzzle[3]+=1
                            
                            else:
#                                 print('partial overlap')
                                patterns_for_puzzle[2]+=1
            
# #                                 print('used some of the sequences')
#                                 patterns_for_puzzle[6]+=1
            
    patterns.append(patterns_for_puzzle)

patterns = np.array(patterns)
        
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/numpy/core/_methods.py:189: RuntimeWarning: invalid value encountered in double_scalars
  ret = ret.dtype.type(ret / rcount)
In [130]:
patterns_p = np.divide(patterns, np.sum(patterns,axis=1).reshape(-1,1))
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:1: RuntimeWarning: invalid value encountered in true_divide
  """Entry point for launching an IPython kernel.
In [131]:
np.nanmean(patterns_p,axis=0)
Out[131]:
array([0.06960946, 0.        , 0.32733827, 0.32092617, 0.20247063,
       0.07965548])
In [132]:
%matplotlib notebook
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
plt.figure(figsize=(10,6))
plt.bar(range(6), np.nanmean(patterns_p,axis=0),
        color=[.7,.7,.7], edgecolor = 'k', 
        yerr=np.nanstd(patterns_p,axis = 0)/np.sqrt(patterns_p.shape[0]))
plt.xticks(range(6), ['same', 'reordering\n(exactly same cities\nbut different order)', 'partial\noverlap',
                      'totally\ndifferent\n(no overlap at all)', 
 'inserting\nnew cities', 'removed some\nfrom prev seq'])
Out[132]:
([<matplotlib.axis.XTick at 0x7fed4a71d7d0>,
  <matplotlib.axis.XTick at 0x7fed4a71d390>,
  <matplotlib.axis.XTick at 0x7fed2bbf7c50>,
  <matplotlib.axis.XTick at 0x7fed4a71b290>,
  <matplotlib.axis.XTick at 0x7fed4a71b3d0>,
  <matplotlib.axis.XTick at 0x7fed08ab5a50>],
 [Text(0, 0, 'same'),
  Text(1, 0, 'reordering\n(exactly same cities\nbut different order)'),
  Text(2, 0, 'partial\noverlap'),
  Text(3, 0, 'totally\ndifferent\n(no overlap at all)'),
  Text(4, 0, 'inserting\nnew cities'),
  Text(5, 0, 'removed some\nfrom prev seq')])

Condition-wise difference in the same puzzle¶

Zigzagness¶

In [133]:
import copy
def get_tortuosity(pathList, pzi = 15):
    def cal_dist(xy1, xy2):
        return np.sqrt(np.sum((np.array(xy1)-np.array(xy2))**2))

    pathList_ = copy.deepcopy(pathList)

    ### for the def
    import json
    with open('./util/basicMap.json','rb') as f:
        basic_map = json.load(f)
    tortuosity = []
    ###

    map_pz = basic_map[pzi]

    import numpy as np
    for _ in range(len(pathList_)):
        path = pathList_.pop(0)
        if len(path)>1:
            ct_init = path.pop(0)
#             print(path)
#             print('init{}_tgt{}'.format(ct_init, path[-1]))
#             print('map_init{}_map_tgt{}'.format(map_pz['xy'][ct_init], map_pz['xy'][path[-1]]))
            d_straight = cal_dist(map_pz['xy'][ct_init], map_pz['xy'][path[-1]])
            d_zigzag = 0
            for _ in range(len(path)):
                ct_tgt = path.pop(0)
                d_zigzag += cal_dist(map_pz['xy'][ct_init], map_pz['xy'][ct_tgt])
                ct_init = ct_tgt
            tortuosity.append([d_zigzag, d_straight])
        else:
            tortuosity.append([np.nan, np.nan])
    return np.array(tortuosity)

import difflib

def get_overlap(s1, s2):
    s = difflib.SequenceMatcher(None, s1, s2)
    pos_a, pos_b, size = s.find_longest_match(0, len(s1), 0, len(s2)) 
    return s1[pos_a:pos_a+size]
 
def get_overlaplist(l1,l2,minlen=1):
    matched_seq = []
    for i1 in range(len(l1)):
#         print('l1:{}'.format(i1))
        matches = [i2 for i2 in range(len(l2)) if l2[i2] == l1[i1]]
#         print(matches)
        for m in matches:
            matched_seq_ = []
            i_a = 0
            while ((i1+i_a)<len(l1)) and ((m+i_a)< len(l2)):
                if l1[i1+i_a]==l2[m+i_a]:
                    matched_seq_.append(l1[i1+i_a])
                    i_a += 1
                else:
                    break
            matched_seq.append(matched_seq_)
            
    idx = np.argsort([len(seq) for seq in matched_seq])[::-1]
#     print(idx)

    seqs_ = []
    for i in idx:
        if len(seqs_) == 0:
            seqs_.append(matched_seq[i])
        else:
            if not np.any([np.all(np.isin(matched_seq[i],seq)) for seq in seqs_]):
                seqs_.append(matched_seq[i])
    matched_seq = seqs_.copy()
    idx = np.argsort([len(seq) for seq in matched_seq])
    seqs_ = []
    for i in idx:
        if len(seqs_) == 0:
            seqs_.append(matched_seq[i])
        else:
            if not np.any([np.all(np.isin(matched_seq[i],seq)) for seq in seqs_]):
                seqs_.append(matched_seq[i])
                
    seqs = []
    for seq in seqs_:
#         print(len(seq))
        if not len(seq)<minlen:
            seqs.append(seq)
            
    return seqs

def trimmer(matched_seq,minlen=1):
    idx = np.argsort([len(seq) for seq in matched_seq])[::-1]
#     print(idx)

    seqs_ = []
    for i in idx:
        if len(seqs_) == 0:
            seqs_.append(matched_seq[i])
        else:
            if not np.any([np.all(np.isin(matched_seq[i],seq)) for seq in seqs_]):
                seqs_.append(matched_seq[i])
    matched_seq = seqs_.copy()
    idx = np.argsort([len(seq) for seq in matched_seq])
    seqs_ = []
    for i in idx:
        if len(seqs_) == 0:
            seqs_.append(matched_seq[i])
        else:
            if not np.any([np.all(np.isin(matched_seq[i],seq)) for seq in seqs_]):
                seqs_.append(matched_seq[i])
                
    seqs = []
    for seq in seqs_:
#         print(len(seq))
        if not len(seq)<minlen:
            seqs.append(seq)
            
    return seqs
In [134]:
undo_level = data_choice_level[data_choice_level['condition']==1].copy().reset_index()
basic_level = data_choice_level[data_choice_level['condition']==0].copy().reset_index()

undo_zigzag = []
basic_zigzag = []
    
    
zigzag_undo_all = []
zigzag_basic_all = []

zigzag_1st_lst = []


for sub in range(100):
    
    #     dat_sbj  = sc_data_choice_level[sc_data_choice_level['subjects']==sub].sort_values(["puzzleID","index"])
    dat_sbj_undo  = undo_level[undo_level['subjects']==sub].sort_values(["puzzleID","index"])
    dat_sbj_basic  = basic_level[basic_level['subjects']==sub].sort_values(["puzzleID","index"])

    undo_zigzag_puzzle = []
    basic_zigzag_puzzle = []
    
    zigzag_1st_lst_puzzle = []
    
    for pzi in np.unique(sc_data_choice_level['puzzleID']):
        
        dat_sbj_undo_pzi = dat_sbj_undo[dat_sbj_undo['puzzleID'] == pzi].reset_index()        
        dat_sbj_basic_pzi = dat_sbj_basic[dat_sbj_basic['puzzleID'] == pzi].reset_index()        

        firstUndo_idx = dat_sbj_undo_pzi[dat_sbj_undo_pzi["firstUndo"]==1].index
        lastUndo_idx = dat_sbj_undo_pzi[dat_sbj_undo_pzi["lastUndo"]==1].index

        submit_idx =  pd.Int64Index([len(dat_sbj_undo_pzi)-1])


        if len(firstUndo_idx)>0:
            if len(firstUndo_idx)==1: #
                seq_bf = dat_sbj_undo_pzi["path"][firstUndo_idx-1]
                seq_af = dat_sbj_undo_pzi["path"][submit_idx]
                seq_im = dat_sbj_undo_pzi["path"][lastUndo_idx]
            else:
                seq_bf = dat_sbj_undo_pzi["path"][firstUndo_idx-1]
                t_idx = (firstUndo_idx[1:]-1).to_list()
                t_idx.append(submit_idx.item())
                seq_af = dat_sbj_undo_pzi["path"][t_idx]
                seq_im = dat_sbj_undo_pzi["path"][lastUndo_idx]

            seq_im = seq_im.reset_index()
            pathStr_im = [seq_im.loc[i].path.strip('[').strip(']') for i in range(len(seq_im))]
            pathList_im = [[int(i) for i in pathStr_im[j].split(', ')]  for j in range(len(pathStr_im))]
            pathStr_im = [" ".join([str(a) for a in pathList_im[j]]) for j in range(len(seq_im))]

            seq_bf = seq_bf.reset_index()
            pathStr_bf = [seq_bf.loc[i].path.strip('[').strip(']') for i in range(len(seq_bf))]
            pathList_bf = [[int(i) for i in pathStr_bf[j].split(', ')]  for j in range(len(pathStr_bf))]
            pathStr_bf = [" ".join([str(a) for a in pathList_bf[j]]) for j in range(len(seq_bf))]

            seq_af = seq_af.reset_index()
            pathStr_af = [seq_af.loc[i].path.strip('[').strip(']') for i in range(len(seq_af))]
            pathList_af = [[int(i) for i in pathStr_af[j].split(', ')]  for j in range(len(pathStr_af))]
            pathStr_af = [" ".join([str(a) for a in pathList_af[j]]) for j in range(len(seq_af))]

            pathList_bfim = [np.setdiff1d(np.array(pathList_bf[i]),np.array(pathList_im[i])).tolist() for i in range(len(pathList_bf))]
            pathStr_bfim = [" ".join([str(a) for a in pathList_bfim[j]]) for j in range(len(pathList_im))]

            pathList_afim = [np.setdiff1d(np.array(pathList_af[i]),np.array(pathList_im[i])).tolist() for i in range(len(pathList_af))]
            pathStr_afim = [" ".join([str(a) for a in pathList_afim[j]]) for j in range(len(pathList_im))]

            seq_basic = dat_sbj_basic_pzi['path'].reset_index()
            pathStr_basic = [seq_basic.loc[i].path.strip('[').strip(']') for i in range(len(seq_basic))]
            pathList_basic = [[int(i) for i in pathStr_basic[j].split(', ')]  for j in range(len(pathStr_basic))]

            tor_basic = get_tortuosity([pathList_basic[-1]],pzi)
            path_basic = (tor_basic[:,0]/tor_basic[:,1])
            zigzag_basic = path_basic.tolist()

            tor_bf = get_tortuosity(pathList_bf, pzi)
            path_bf_undo = (tor_bf[:,0]/tor_bf[:,1])
            tor_af = get_tortuosity(pathList_af, pzi)
            path_af_undo = (tor_af[:,0]/tor_af[:,1])

            zigzag_undo = [*path_af_undo.tolist(), path_bf_undo[-1]]

            zz = []
            for count, z in enumerate(zigzag_undo):
                zz.append([count,z])
                
            zigzag_undo_all.extend(zz)
            zigzag_basic_all.extend(zigzag_basic)
            
            
        
#         undo_zigzag_puzzle.append(np.mean(zigzag_undo[0]))
        undo_zigzag_puzzle.append(np.mean(zigzag_undo))
        basic_zigzag_puzzle.append(zigzag_basic[0])
        zigzag_1st_lst_puzzle.append([zigzag_basic[0], zigzag_undo[0], np.mean(zigzag_undo), zigzag_undo[-1]])
                
                
    undo_zigzag_puzzle = np.array(undo_zigzag_puzzle)
    basic_zigzag_puzzle = np.array(basic_zigzag_puzzle)
    zigzag_1st_lst_puzzle = np.array(zigzag_1st_lst_puzzle)
    
    undo_zigzag.append(undo_zigzag_puzzle)
    basic_zigzag.append(basic_zigzag_puzzle)
    zigzag_1st_lst.append(zigzag_1st_lst_puzzle)
    
    
    
    
    
undo_zigzag = np.array(undo_zigzag) 
basic_zigzag = np.array(basic_zigzag)
zigzag_1st_lst = np.array(zigzag_1st_lst)
                                
In [135]:
bu=np.array([np.mean(basic_zigzag,axis=1),np.mean(undo_zigzag,axis=1)])
In [136]:
zigzag_1st_lst_mean = np.mean(zigzag_1st_lst,axis=1)
In [137]:
stat,p= ttest_ind(np.mean(basic_zigzag,axis=1),np.mean(undo_zigzag,axis=1))
print(p)
stat,p= ttest_ind(zigzag_1st_lst_mean[:,0],zigzag_1st_lst_mean[:,2])
print(p)
0.8998599339579605
0.8998599339579652
In [138]:
stat,p= wilcoxon(np.mean(basic_zigzag,axis=1),np.mean(undo_zigzag,axis=1))
print(p)
stat,p= wilcoxon(zigzag_1st_lst_mean[:,0],zigzag_1st_lst_mean[:,2])
print(p)
0.7010287235625708
0.7010287235625708
In [139]:
%matplotlib inline

mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
plt.figure(figsize=(10,4.5))
bb = plt.bar(range(4), np.nanmean(zigzag_1st_lst_mean,axis=0),
        color=[.7,.7,.7], edgecolor = 'k', 
        yerr=np.nanstd(zigzag_1st_lst_mean,axis = 0)/np.sqrt(zigzag_1st_lst_mean.shape[0]))
plt.xticks(range(4), ['Path \nin without undo','The first path','Avg. of paths\nin with undo','The last path'])
plt.ylabel('tortuosity')
Out[139]:
Text(0, 0.5, 'tortuosity')
In [140]:
%matplotlib inline

mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
bb = plt.bar(range(2), np.nanmean(zigzag_1st_lst_mean[:,[0,2]],axis=0),
        color=[.7,.7,.7], edgecolor = 'k', 
        yerr=np.nanstd(zigzag_1st_lst_mean[:,[0,2]],axis = 0)/np.sqrt(zigzag_1st_lst_mean.shape[0]))
plt.xticks(range(2), ['Paths \nin without undo','Paths \nin with undo'])
plt.ylabel('tortuosity')
plt.title('No difference!')
Out[140]:
Text(0.5, 1.0, 'No difference!')

Overlapped cities not sequences (to be removed)¶

overlaps from all of the prev seqs¶

In [141]:
import difflib

def get_overlaplist(l1,l2):
    
    matched_seq = []
    for i1 in range(len(l1)):
        matches = [l2[i2] for i2 in range(len(l2)) if l2[i2] == l1[i1]]
        matched_seq.extend(matches)

    return matched_seq
In [142]:
overlap_seq_inorder = []
overlap_seq_inorder_inlen=[]

for sub in range(100):
    dat_sbj  = sc_data_choice_level[sc_data_choice_level['subjects']==sub].sort_values(["puzzleID","index"])
    overlap_seq_inorder_for_puzzle = []
    overlap_seq_inorder_inlen_for_puzzle=[]
    for pzi in np.unique(sc_data_choice_level['puzzleID']):
        dat_sbj_pzi = dat_sbj[dat_sbj['puzzleID'] == pzi].reset_index()        

        firstUndo_idx = dat_sbj_pzi[dat_sbj_pzi["firstUndo"]==1].index
        path_bf_undo = dat_sbj_pzi["currMas"][firstUndo_idx-1] # the mas of the state before undo

        lastUndo_idx = dat_sbj_pzi[dat_sbj_pzi["lastUndo"]==1].index
        path_af_undo = dat_sbj_pzi["currMas"][lastUndo_idx+1] # the mas of the state after undo
        
        submit_idx =  pd.Int64Index([len(dat_sbj_pzi)-1])


        if len(firstUndo_idx)>0:
            if len(firstUndo_idx)==1: #
                seq_bf = dat_sbj_pzi["path"][firstUndo_idx-1]
                seq_af = dat_sbj_pzi["path"][submit_idx]
                seq_im = dat_sbj_pzi["path"][lastUndo_idx]

            else:
                seq_bf = dat_sbj_pzi["path"][firstUndo_idx-1]
                t_idx = (firstUndo_idx[1:]-1).to_list()
                t_idx.append(submit_idx.item())
                seq_af = dat_sbj_pzi["path"][t_idx]
                seq_im = dat_sbj_pzi["path"][lastUndo_idx]

            seq_im = seq_im.reset_index()
            pathStr_im = [seq_im.loc[i].path.strip('[').strip(']') for i in range(len(seq_im))]
            pathList_im = [[int(i) for i in pathStr_im[j].split(', ')]  for j in range(len(pathStr_im))]
            pathStr_im = [" ".join([str(a) for a in pathList_im[j]]) for j in range(len(seq_im))]

            seq_bf = seq_bf.reset_index()
            pathStr_bf = [seq_bf.loc[i].path.strip('[').strip(']') for i in range(len(seq_bf))]
            pathList_bf = [[int(i) for i in pathStr_bf[j].split(', ')]  for j in range(len(pathStr_bf))]
            pathStr_bf = [" ".join([str(a) for a in pathList_bf[j]]) for j in range(len(seq_bf))]

            seq_af = seq_af.reset_index()
            pathStr_af = [seq_af.loc[i].path.strip('[').strip(']') for i in range(len(seq_af))]
            pathList_af = [[int(i) for i in pathStr_af[j].split(', ')]  for j in range(len(pathStr_af))]
            pathStr_af = [" ".join([str(a) for a in pathList_af[j]]) for j in range(len(seq_af))]

            pathList_bfim = [np.setdiff1d(np.array(pathList_bf[i]),np.array(pathList_im[i])).tolist() for i in range(len(pathList_bf))]
            pathStr_bfim = [" ".join([str(a) for a in pathList_bfim[j]]) for j in range(len(pathList_im))]

            pathList_afim = [np.setdiff1d(np.array(pathList_af[i]),np.array(pathList_im[i])).tolist() for i in range(len(pathList_af))]
            pathStr_afim = [" ".join([str(a) for a in pathList_afim[j]]) for j in range(len(pathList_im))]


        #     seq_inorder = [get_overlap(pathStr_af[i], pathStr_bf[i]).strip('0 ').split(' ') for i in range(len(seq_af))]
        #     seq_inorder = [[] if i[0]=='' else [int(j) for j in i] for i in seq_inorder]

            seq_inorder = [get_overlaplist(pathList_afim[i], pathList_bfim[j]) for i in range(len(seq_af))for j in range(i+1) ]
        #     seq_inorder = [[] if i[0]=='' else [int(j) for j in i] for i in seq_inorder]


            len_seq_inorder = [len(s) for s in seq_inorder]

            # overlap_seq_inorder_for_puzzle.append(len_seq_inorder)
            overlap_seq_inorder_for_puzzle.append([np.sum(np.array(len_seq_inorder)==0), np.sum(np.array(len_seq_inorder)!=0)])
            overlap_seq_inorder_inlen_for_puzzle.append([np.sum(np.array(len_seq_inorder)==0), np.sum(np.array(len_seq_inorder)==1), 
                                                         np.sum(np.array(len_seq_inorder)==2), np.sum(np.array(len_seq_inorder)==3),
                                                        np.sum(np.array(len_seq_inorder)==4), np.sum(np.array(len_seq_inorder)>4),])
            
        else:
            overlap_seq_inorder_for_puzzle.append([np.nan,np.nan])
            overlap_seq_inorder_inlen_for_puzzle.append([np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,])
    overlap_seq_inorder_for_puzzle = np.array(overlap_seq_inorder_for_puzzle)
    temp = overlap_seq_inorder_for_puzzle.copy()
    overlap_seq_inorder.append(np.nansum(temp,axis=0))

    overlap_seq_inorder_inlen_for_puzzle=np.array(overlap_seq_inorder_inlen_for_puzzle)
    temp = overlap_seq_inorder_inlen_for_puzzle.copy()
    overlap_seq_inorder_inlen.append(np.nansum(temp,axis=0))
overlap_seq_inorder = np.array(overlap_seq_inorder)
overlap_seq_inorder_inlen = np.array(overlap_seq_inorder_inlen)
In [143]:
overlap_seq_inorder_inlen_minlen2 = overlap_seq_inorder_inlen.copy()
overlap_seq_inorder_inlen_minlen2 = overlap_seq_inorder_inlen_minlen2[:,np.setdiff1d(range(overlap_seq_inorder_inlen_minlen2.shape[1]),1)]
overlap_seq_inorder_minlen2 = overlap_seq_inorder_inlen_minlen2.copy()
overlap_seq_inorder_minlen2 = np.array([overlap_seq_inorder_minlen2[:,0], np.sum(overlap_seq_inorder_minlen2[:,1:],axis=1) ]).transpose()

overlap_seq_inorder_p = overlap_seq_inorder/np.sum(overlap_seq_inorder,axis=1)[:,None]
overlap_seq_inorder_inlen_p = overlap_seq_inorder_inlen/np.sum(overlap_seq_inorder_inlen,axis=1)[:,None]


overlap_seq_inorder_minlen2_p = overlap_seq_inorder_minlen2/np.sum(overlap_seq_inorder_minlen2,axis=1)[:,None]
overlap_seq_inorder_inlen_minlen2_p = overlap_seq_inorder_inlen_minlen2/np.sum(overlap_seq_inorder_inlen_minlen2,axis=1)[:,None]
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:6: RuntimeWarning: invalid value encountered in true_divide
  
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:7: RuntimeWarning: invalid value encountered in true_divide
  import sys
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in true_divide
  # Remove the CWD from sys.path while we load stuff.
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:11: RuntimeWarning: invalid value encountered in true_divide
  # This is added back by InteractiveShellApp.init_path()
In [144]:
print(np.nanmean(overlap_seq_inorder_p,axis=0))
print(np.nanmean(overlap_seq_inorder_inlen_p,axis=0))
print(np.nanmean(overlap_seq_inorder_minlen2_p,axis=0))
print(np.nanmean(overlap_seq_inorder_inlen_minlen2_p,axis=0))
[0.35854577 0.64145423]
[0.35854577 0.21940371 0.08219769 0.08340843 0.09223434 0.16421005]
[0.45599214 0.54400786]
[0.45599214 0.11219922 0.10755462 0.11707653 0.20717749]
In [145]:
%matplotlib notebook


mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
plt.figure(figsize=(6,4.5))
plt.bar(range(2), np.nanmean(overlap_seq_inorder_p,axis=0),
        color=[.7,.7,.7], edgecolor = 'k', 
        yerr=np.nanstd(overlap_seq_inorder_p,axis = 0)/np.sqrt(overlap_seq_inorder_p.shape[0]))
plt.xticks([0,1], ['totally different','has some cities that overlaps'])
plt.xlabel('comparison path before and after undoing')
Out[145]:
Text(0.5, 0, 'comparison path before and after undoing')
In [146]:
%matplotlib notebook

plt.figure()
plt.bar(range(6), np.nanmean(overlap_seq_inorder_inlen_p,axis=0),
        color=[.7,.7,.7], edgecolor = 'k', 
        yerr=np.nanstd(overlap_seq_inorder_inlen_p,axis = 0)/np.sqrt(overlap_seq_inorder_inlen_p.shape[0]))
plt.xticks(range(6), ['totally \ndifferent','len=1','len=2','len=3'
                     ,'len=4','len>=5'])
plt.xlabel('comparison path before and after undoing')
Out[146]:
Text(0.5, 0, 'comparison path before and after undoing')
In [147]:
common_idx = np.intersect1d(np.where(~np.isnan(currmas_minlen2_p[:,0]))[0], np.where(~np.isnan(undo_zigzag_diff_minlen2_p[:,0]))[0])
from scipy.stats import pearsonr

print(pearsonr(currmas_minlen2_p[common_idx,0], undo_zigzag_diff_minlen2_p[common_idx,0]))
(0.10207177016241915, 0.38027585152785426)